OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
207 class OffsetsVector { | 207 class OffsetsVector { |
208 public: | 208 public: |
209 inline OffsetsVector(int num_registers) | 209 inline OffsetsVector(int num_registers) |
210 : offsets_vector_length_(num_registers) { | 210 : offsets_vector_length_(num_registers) { |
211 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 211 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
212 vector_ = NewArray<int>(offsets_vector_length_); | 212 vector_ = NewArray<int>(offsets_vector_length_); |
213 } else { | 213 } else { |
214 vector_ = static_offsets_vector_; | 214 vector_ = static_offsets_vector_; |
215 } | 215 } |
216 } | 216 } |
217 | |
218 | |
219 inline ~OffsetsVector() { | 217 inline ~OffsetsVector() { |
220 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 218 if (offsets_vector_length_ > kStaticOffsetsVectorSize) { |
221 DeleteArray(vector_); | 219 DeleteArray(vector_); |
222 vector_ = NULL; | 220 vector_ = NULL; |
223 } | 221 } |
224 } | 222 } |
225 | 223 inline int* vector() { return vector_; } |
226 | 224 inline int length() { return offsets_vector_length_; } |
227 inline int* vector() { | |
228 return vector_; | |
229 } | |
230 | |
231 | |
232 inline int length() { | |
233 return offsets_vector_length_; | |
234 } | |
235 | 225 |
236 private: | 226 private: |
237 int* vector_; | 227 int* vector_; |
238 int offsets_vector_length_; | 228 int offsets_vector_length_; |
239 static const int kStaticOffsetsVectorSize = 50; | 229 static const int kStaticOffsetsVectorSize = 50; |
240 static int static_offsets_vector_[kStaticOffsetsVectorSize]; | 230 static int static_offsets_vector_[kStaticOffsetsVectorSize]; |
241 }; | 231 }; |
242 | 232 |
243 | 233 |
244 int OffsetsVector::static_offsets_vector_[ | 234 int OffsetsVector::static_offsets_vector_[ |
(...skipping 551 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
796 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | 786 int previous_index = static_cast<int>(DoubleToInteger(index->Number())); |
797 | 787 |
798 #ifdef DEBUG | 788 #ifdef DEBUG |
799 if (FLAG_trace_regexp_bytecodes) { | 789 if (FLAG_trace_regexp_bytecodes) { |
800 String* pattern = regexp->Pattern(); | 790 String* pattern = regexp->Pattern(); |
801 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 791 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
802 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 792 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
803 } | 793 } |
804 #endif | 794 #endif |
805 LOG(RegExpExecEvent(regexp, previous_index, subject)); | 795 LOG(RegExpExecEvent(regexp, previous_index, subject)); |
| 796 |
| 797 if (!subject->IsFlat(StringShape(*subject))) { |
| 798 FlattenString(subject); |
| 799 } |
| 800 |
806 return IrregexpExecOnce(irregexp, | 801 return IrregexpExecOnce(irregexp, |
807 num_captures, | 802 num_captures, |
808 subject, | 803 subject, |
809 previous_index, | 804 previous_index, |
810 offsets.vector(), | 805 offsets.vector(), |
811 offsets.length()); | 806 offsets.length()); |
812 } | 807 } |
813 | 808 |
814 | 809 |
815 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | 810 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, |
(...skipping 14 matching lines...) Expand all Loading... |
830 int previous_index = 0; | 825 int previous_index = 0; |
831 | 826 |
832 Handle<JSArray> result = Factory::NewJSArray(0); | 827 Handle<JSArray> result = Factory::NewJSArray(0); |
833 int i = 0; | 828 int i = 0; |
834 Handle<Object> matches; | 829 Handle<Object> matches; |
835 | 830 |
836 if (!subject->IsFlat(shape)) { | 831 if (!subject->IsFlat(shape)) { |
837 subject->Flatten(shape); | 832 subject->Flatten(shape); |
838 } | 833 } |
839 | 834 |
840 do { | 835 while (true) { |
841 if (previous_index > subject->length() || previous_index < 0) { | 836 if (previous_index > subject->length() || previous_index < 0) { |
842 // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 837 // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
843 // string length, there is no match. | 838 // string length, there is no match. |
844 matches = Factory::null_value(); | 839 matches = Factory::null_value(); |
| 840 return result; |
845 } else { | 841 } else { |
846 #ifdef DEBUG | 842 #ifdef DEBUG |
847 if (FLAG_trace_regexp_bytecodes) { | 843 if (FLAG_trace_regexp_bytecodes) { |
848 String* pattern = regexp->Pattern(); | 844 String* pattern = regexp->Pattern(); |
849 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); | 845 PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); |
850 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 846 PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); |
851 } | 847 } |
852 #endif | 848 #endif |
853 LOG(RegExpExecEvent(regexp, previous_index, subject)); | 849 LOG(RegExpExecEvent(regexp, previous_index, subject)); |
854 matches = IrregexpExecOnce(irregexp, | 850 matches = IrregexpExecOnce(irregexp, |
855 IrregexpNumberOfCaptures(irregexp), | 851 IrregexpNumberOfCaptures(irregexp), |
856 subject, | 852 subject, |
857 previous_index, | 853 previous_index, |
858 offsets.vector(), | 854 offsets.vector(), |
859 offsets.length()); | 855 offsets.length()); |
860 | 856 |
861 if (matches->IsJSArray()) { | 857 if (matches->IsJSArray()) { |
862 SetElement(result, i, matches); | 858 SetElement(result, i, matches); |
863 i++; | 859 i++; |
864 previous_index = offsets.vector()[1]; | 860 previous_index = offsets.vector()[1]; |
865 if (offsets.vector()[0] == offsets.vector()[1]) { | 861 if (offsets.vector()[0] == offsets.vector()[1]) { |
866 previous_index++; | 862 previous_index++; |
867 } | 863 } |
| 864 } else if (matches->IsNull()) { |
| 865 return result; |
| 866 } else { |
| 867 return matches; |
868 } | 868 } |
869 } | 869 } |
870 } while (matches->IsJSArray()); | |
871 | |
872 // If we exited the loop with an exception, throw it. | |
873 if (matches->IsNull()) { | |
874 // Exited loop normally. | |
875 return result; | |
876 } else { | |
877 // Exited loop with the exception in matches. | |
878 return matches; | |
879 } | 870 } |
880 } | 871 } |
881 | 872 |
882 | 873 |
883 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, | 874 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, |
884 int num_captures, | 875 int num_captures, |
885 Handle<String> subject, | 876 Handle<String> subject, |
886 int previous_index, | 877 int previous_index, |
887 int* offsets_vector, | 878 int* offsets_vector, |
888 int offsets_vector_length) { | 879 int offsets_vector_length) { |
| 880 ASSERT(subject->IsFlat(StringShape(*subject))); |
889 bool rc; | 881 bool rc; |
890 | 882 |
891 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); | 883 int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); |
892 | 884 |
893 if (!subject->IsFlat(StringShape(*subject))) { | |
894 FlattenString(subject); | |
895 } | |
896 | |
897 switch (tag) { | 885 switch (tag) { |
898 case RegExpMacroAssembler::kIA32Implementation: { | 886 case RegExpMacroAssembler::kIA32Implementation: { |
899 #ifndef ARM | 887 #ifndef ARM |
900 Handle<Code> code = IrregexpNativeCode(irregexp); | 888 Handle<Code> code = IrregexpNativeCode(irregexp); |
901 | 889 |
902 StringShape shape(*subject); | 890 StringShape shape(*subject); |
903 | 891 |
904 // Character offsets into string. | 892 // Character offsets into string. |
905 int start_offset = previous_index; | 893 int start_offset = previous_index; |
906 int end_offset = subject->length(shape); | 894 int end_offset = subject->length(shape); |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
990 rc = false; | 978 rc = false; |
991 break; | 979 break; |
992 } | 980 } |
993 | 981 |
994 if (!rc) { | 982 if (!rc) { |
995 return Factory::null_value(); | 983 return Factory::null_value(); |
996 } | 984 } |
997 | 985 |
998 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | 986 Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); |
999 // The captures come in (start, end+1) pairs. | 987 // The captures come in (start, end+1) pairs. |
1000 for (int i = 0; i < 2 * (num_captures+1); i += 2) { | 988 for (int i = 0; i < 2 * (num_captures + 1); i += 2) { |
1001 array->set(i, Smi::FromInt(offsets_vector[i])); | 989 array->set(i, Smi::FromInt(offsets_vector[i])); |
1002 array->set(i+1, Smi::FromInt(offsets_vector[i+1])); | 990 array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); |
1003 } | 991 } |
1004 return Factory::NewJSArrayWithElements(array); | 992 return Factory::NewJSArrayWithElements(array); |
1005 } | 993 } |
1006 | 994 |
1007 | 995 |
1008 // ------------------------------------------------------------------- | 996 // ------------------------------------------------------------------- |
1009 // Implmentation of the Irregexp regular expression engine. | 997 // Implmentation of the Irregexp regular expression engine. |
1010 // | 998 // |
1011 // The Irregexp regular expression engine is intended to be a complete | 999 // The Irregexp regular expression engine is intended to be a complete |
1012 // implementation of ECMAScript regular expressions. It generates either | 1000 // implementation of ECMAScript regular expressions. It generates either |
(...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1337 for (DeferredAction* action = actions_; | 1325 for (DeferredAction* action = actions_; |
1338 action != NULL; | 1326 action != NULL; |
1339 action = action->next()) { | 1327 action = action->next()) { |
1340 affected_registers->Set(action->reg()); | 1328 affected_registers->Set(action->reg()); |
1341 if (action->reg() > max_register) max_register = action->reg(); | 1329 if (action->reg() > max_register) max_register = action->reg(); |
1342 } | 1330 } |
1343 return max_register; | 1331 return max_register; |
1344 } | 1332 } |
1345 | 1333 |
1346 | 1334 |
1347 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro, | 1335 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* assembler, |
1348 int max_register, | 1336 int max_register, |
1349 OutSet& affected_registers) { | 1337 OutSet& affected_registers) { |
1350 for (int reg = 0; reg <= max_register; reg++) { | 1338 for (int reg = 0; reg <= max_register; reg++) { |
1351 if (affected_registers.Get(reg)) macro->PushRegister(reg); | 1339 if (affected_registers.Get(reg)) assembler->PushRegister(reg); |
1352 } | 1340 } |
1353 } | 1341 } |
1354 | 1342 |
1355 | 1343 |
1356 void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro, | 1344 void GenerationVariant::RestoreAffectedRegisters( |
1357 int max_register, | 1345 RegExpMacroAssembler* assembler, |
1358 OutSet& affected_registers) { | 1346 int max_register, |
| 1347 OutSet& affected_registers) { |
1359 for (int reg = max_register; reg >= 0; reg--) { | 1348 for (int reg = max_register; reg >= 0; reg--) { |
1360 if (affected_registers.Get(reg)) macro->PopRegister(reg); | 1349 if (affected_registers.Get(reg)) assembler->PopRegister(reg); |
1361 } | 1350 } |
1362 } | 1351 } |
1363 | 1352 |
1364 | 1353 |
1365 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro, | 1354 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* assembler, |
1366 int max_register, | 1355 int max_register, |
1367 OutSet& affected_registers) { | 1356 OutSet& affected_registers) { |
1368 for (int reg = 0; reg <= max_register; reg++) { | 1357 for (int reg = 0; reg <= max_register; reg++) { |
1369 if (!affected_registers.Get(reg)) { | 1358 if (!affected_registers.Get(reg)) { |
1370 continue; | 1359 continue; |
1371 } | 1360 } |
1372 int value = 0; | 1361 int value = 0; |
1373 bool absolute = false; | 1362 bool absolute = false; |
1374 int store_position = -1; | 1363 int store_position = -1; |
1375 // This is a little tricky because we are scanning the actions in reverse | 1364 // This is a little tricky because we are scanning the actions in reverse |
(...skipping 27 matching lines...) Expand all Loading... |
1403 ASSERT_EQ(value, 0); | 1392 ASSERT_EQ(value, 0); |
1404 break; | 1393 break; |
1405 } | 1394 } |
1406 default: | 1395 default: |
1407 UNREACHABLE(); | 1396 UNREACHABLE(); |
1408 break; | 1397 break; |
1409 } | 1398 } |
1410 } | 1399 } |
1411 } | 1400 } |
1412 if (store_position != -1) { | 1401 if (store_position != -1) { |
1413 macro->WriteCurrentPositionToRegister(reg, store_position); | 1402 assembler->WriteCurrentPositionToRegister(reg, store_position); |
1414 } else { | 1403 } else { |
1415 if (absolute) { | 1404 if (absolute) { |
1416 macro->SetRegister(reg, value); | 1405 assembler->SetRegister(reg, value); |
1417 } else { | 1406 } else { |
1418 if (value != 0) { | 1407 if (value != 0) { |
1419 macro->AdvanceRegister(reg, value); | 1408 assembler->AdvanceRegister(reg, value); |
1420 } | 1409 } |
1421 } | 1410 } |
1422 } | 1411 } |
1423 } | 1412 } |
1424 } | 1413 } |
1425 | 1414 |
1426 | 1415 |
1427 // This is called as we come into a loop choice node and some other tricky | 1416 // This is called as we come into a loop choice node and some other tricky |
1428 // nodes. It normalises the state of the code generator to ensure we can | 1417 // nodes. It normalises the state of the code generator to ensure we can |
1429 // generate generic code. | 1418 // generate generic code. |
1430 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { |
1431 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1420 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1432 | 1421 |
1433 ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL); | 1422 ASSERT(actions_ != NULL || |
| 1423 cp_offset_ != 0 || |
| 1424 backtrack() != NULL || |
| 1425 characters_preloaded_ != 0 || |
| 1426 quick_check_performed_.characters() != 0); |
1434 | 1427 |
1435 if (actions_ == NULL && backtrack() == NULL) { | 1428 if (actions_ == NULL && backtrack() == NULL) { |
1436 // Here we just have some deferred cp advances to fix and we are back to | 1429 // Here we just have some deferred cp advances to fix and we are back to |
1437 // a normal situation. | 1430 // a normal situation. We may also have to forget some information gained |
1438 macro->AdvanceCurrentPosition(cp_offset_); | 1431 // through a quick check that was already performed. |
| 1432 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); |
1439 // Create a new trivial state and generate the node with that. | 1433 // Create a new trivial state and generate the node with that. |
1440 GenerationVariant new_state; | 1434 GenerationVariant new_state; |
1441 return successor->Emit(compiler, &new_state); | 1435 return successor->Emit(compiler, &new_state); |
1442 } | 1436 } |
1443 | 1437 |
1444 // Generate deferred actions here along with code to undo them again. | 1438 // Generate deferred actions here along with code to undo them again. |
1445 OutSet affected_registers; | 1439 OutSet affected_registers; |
1446 int max_register = FindAffectedRegisters(&affected_registers); | 1440 int max_register = FindAffectedRegisters(&affected_registers); |
1447 PushAffectedRegisters(macro, max_register, affected_registers); | 1441 PushAffectedRegisters(assembler, max_register, affected_registers); |
1448 PerformDeferredActions(macro, max_register, affected_registers); | 1442 PerformDeferredActions(assembler, max_register, affected_registers); |
1449 if (backtrack() != NULL) { | 1443 if (backtrack() != NULL) { |
1450 // Here we have a concrete backtrack location. These are set up by choice | 1444 // Here we have a concrete backtrack location. These are set up by choice |
1451 // nodes and so they indicate that we have a deferred save of the current | 1445 // nodes and so they indicate that we have a deferred save of the current |
1452 // position which we may need to emit here. | 1446 // position which we may need to emit here. |
1453 macro->PushCurrentPosition(); | 1447 assembler->PushCurrentPosition(); |
1454 } | 1448 } |
1455 if (cp_offset_ != 0) { | 1449 if (cp_offset_ != 0) { |
1456 macro->AdvanceCurrentPosition(cp_offset_); | 1450 assembler->AdvanceCurrentPosition(cp_offset_); |
1457 } | 1451 } |
1458 | 1452 |
1459 // Create a new trivial state and generate the node with that. | 1453 // Create a new trivial state and generate the node with that. |
1460 Label undo; | 1454 Label undo; |
1461 macro->PushBacktrack(&undo); | 1455 assembler->PushBacktrack(&undo); |
1462 GenerationVariant new_state; | 1456 GenerationVariant new_state; |
1463 bool ok = successor->Emit(compiler, &new_state); | 1457 bool ok = successor->Emit(compiler, &new_state); |
1464 | 1458 |
1465 // On backtrack we need to restore state. | 1459 // On backtrack we need to restore state. |
1466 macro->Bind(&undo); | 1460 assembler->Bind(&undo); |
1467 if (!ok) return false; | 1461 if (!ok) return false; |
1468 if (backtrack() != NULL) { | 1462 if (backtrack() != NULL) { |
1469 macro->PopCurrentPosition(); | 1463 assembler->PopCurrentPosition(); |
1470 } | 1464 } |
1471 RestoreAffectedRegisters(macro, max_register, affected_registers); | 1465 RestoreAffectedRegisters(assembler, max_register, affected_registers); |
1472 if (backtrack() == NULL) { | 1466 if (backtrack() == NULL) { |
1473 macro->Backtrack(); | 1467 assembler->Backtrack(); |
1474 } else { | 1468 } else { |
1475 macro->GoTo(backtrack()); | 1469 assembler->GoTo(backtrack()); |
1476 } | 1470 } |
1477 | 1471 |
1478 return true; | 1472 return true; |
1479 } | 1473 } |
1480 | 1474 |
1481 | 1475 |
1482 void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro, | 1476 void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, |
1483 GenerationVariant* variant) { | 1477 GenerationVariant* variant) { |
1484 if (info()->at_end) { | 1478 if (info()->at_end) { |
1485 Label succeed; | 1479 Label succeed; |
1486 // LoadCurrentCharacter will go to the label if we are at the end of the | 1480 // LoadCurrentCharacter will go to the label if we are at the end of the |
1487 // input string. | 1481 // input string. |
1488 macro->LoadCurrentCharacter(0, &succeed); | 1482 assembler->LoadCurrentCharacter(0, &succeed); |
1489 macro->GoTo(variant->backtrack()); | 1483 assembler->GoTo(variant->backtrack()); |
1490 macro->Bind(&succeed); | 1484 assembler->Bind(&succeed); |
1491 } | 1485 } |
1492 } | 1486 } |
1493 | 1487 |
1494 | 1488 |
1495 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, | 1489 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, |
1496 GenerationVariant* variant) { | 1490 GenerationVariant* variant) { |
1497 if (!variant->is_trivial()) { | 1491 if (!variant->is_trivial()) { |
1498 return variant->Flush(compiler, this); | 1492 return variant->Flush(compiler, this); |
1499 } | 1493 } |
1500 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1494 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1501 if (!label()->is_bound()) { | 1495 if (!label()->is_bound()) { |
1502 macro->Bind(label()); | 1496 assembler->Bind(label()); |
1503 } | 1497 } |
1504 EmitInfoChecks(macro, variant); | 1498 EmitInfoChecks(assembler, variant); |
1505 macro->ReadCurrentPositionFromRegister(current_position_register_); | 1499 assembler->ReadCurrentPositionFromRegister(current_position_register_); |
1506 macro->ReadStackPointerFromRegister(stack_pointer_register_); | 1500 assembler->ReadStackPointerFromRegister(stack_pointer_register_); |
1507 // Now that we have unwound the stack we find at the top of the stack the | 1501 // Now that we have unwound the stack we find at the top of the stack the |
1508 // backtrack that the BeginSubmatch node got. | 1502 // backtrack that the BeginSubmatch node got. |
1509 macro->Backtrack(); | 1503 assembler->Backtrack(); |
1510 return true; | 1504 return true; |
1511 } | 1505 } |
1512 | 1506 |
1513 | 1507 |
1514 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 1508 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { |
1515 if (!variant->is_trivial()) { | 1509 if (!variant->is_trivial()) { |
1516 return variant->Flush(compiler, this); | 1510 return variant->Flush(compiler, this); |
1517 } | 1511 } |
1518 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1512 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
1519 if (!label()->is_bound()) { | 1513 if (!label()->is_bound()) { |
1520 macro->Bind(label()); | 1514 assembler->Bind(label()); |
1521 } | 1515 } |
1522 switch (action_) { | 1516 switch (action_) { |
1523 case ACCEPT: | 1517 case ACCEPT: |
1524 EmitInfoChecks(macro, variant); | 1518 EmitInfoChecks(assembler, variant); |
1525 macro->Succeed(); | 1519 assembler->Succeed(); |
1526 return true; | 1520 return true; |
1527 case BACKTRACK: | 1521 case BACKTRACK: |
1528 ASSERT(!info()->at_end); | 1522 ASSERT(!info()->at_end); |
1529 macro->GoTo(variant->backtrack()); | 1523 assembler->GoTo(variant->backtrack()); |
1530 return true; | 1524 return true; |
1531 case NEGATIVE_SUBMATCH_SUCCESS: | 1525 case NEGATIVE_SUBMATCH_SUCCESS: |
1532 // This case is handled in a different virtual method. | 1526 // This case is handled in a different virtual method. |
1533 UNREACHABLE(); | 1527 UNREACHABLE(); |
1534 } | 1528 } |
1535 UNIMPLEMENTED(); | 1529 UNIMPLEMENTED(); |
1536 return false; | 1530 return false; |
1537 } | 1531 } |
1538 | 1532 |
1539 | 1533 |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1622 variant->backtrack()); | 1616 variant->backtrack()); |
1623 break; | 1617 break; |
1624 } | 1618 } |
1625 } | 1619 } |
1626 | 1620 |
1627 | 1621 |
1628 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | 1622 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; |
1629 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | 1623 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; |
1630 | 1624 |
1631 | 1625 |
1632 static inline void EmitAtomNonLetters( | 1626 // Only emits non-letters (things that don't have case). Only used for case |
| 1627 // independent matches. |
| 1628 static inline bool EmitAtomNonLetter( |
1633 RegExpMacroAssembler* macro_assembler, | 1629 RegExpMacroAssembler* macro_assembler, |
1634 TextElement elm, | 1630 uc16 c, |
1635 Vector<const uc16> quarks, | |
1636 Label* on_failure, | 1631 Label* on_failure, |
1637 int cp_offset, | 1632 int cp_offset, |
1638 bool check_offset) { | 1633 bool check, |
| 1634 bool preloaded) { |
1639 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1635 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1640 // It is vital that this loop is backwards due to the unchecked character | 1636 int length = uncanonicalize.get(c, '\0', chars); |
1641 // load below. | 1637 bool checked = false; |
1642 for (int i = quarks.length() - 1; i >= 0; i--) { | 1638 if (length <= 1) { |
1643 uc16 c = quarks[i]; | 1639 if (!preloaded) { |
1644 int length = uncanonicalize.get(c, '\0', chars); | 1640 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1645 if (length <= 1) { | 1641 checked = check; |
1646 if (check_offset && i == quarks.length() - 1) { | |
1647 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | |
1648 } else { | |
1649 // Here we don't need to check against the end of the input string | |
1650 // since this character lies before a character that matched. | |
1651 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | |
1652 } | |
1653 macro_assembler->CheckNotCharacter(c, on_failure); | |
1654 } | 1642 } |
| 1643 macro_assembler->CheckNotCharacter(c, on_failure); |
1655 } | 1644 } |
| 1645 return checked; |
1656 } | 1646 } |
1657 | 1647 |
1658 | 1648 |
1659 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1649 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
1660 uc16 c1, | 1650 uc16 c1, |
1661 uc16 c2, | 1651 uc16 c2, |
1662 Label* on_failure) { | 1652 Label* on_failure) { |
1663 uc16 exor = c1 ^ c2; | 1653 uc16 exor = c1 ^ c2; |
1664 // Check whether exor has only one bit set. | 1654 // Check whether exor has only one bit set. |
1665 if (((exor - 1) & exor) == 0) { | 1655 if (((exor - 1) & exor) == 0) { |
1666 // If c1 and c2 differ only by one bit. | 1656 // If c1 and c2 differ only by one bit. |
1667 // Ecma262UnCanonicalize always gives the highest number last. | 1657 // Ecma262UnCanonicalize always gives the highest number last. |
1668 ASSERT(c2 > c1); | 1658 ASSERT(c2 > c1); |
1669 macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure); | 1659 uc16 mask = String::kMaxUC16CharCode ^ exor; |
| 1660 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); |
1670 return true; | 1661 return true; |
1671 } | 1662 } |
1672 ASSERT(c2 > c1); | 1663 ASSERT(c2 > c1); |
1673 uc16 diff = c2 - c1; | 1664 uc16 diff = c2 - c1; |
1674 if (((diff - 1) & diff) == 0 && c1 >= diff) { | 1665 if (((diff - 1) & diff) == 0 && c1 >= diff) { |
1675 // If the characters differ by 2^n but don't differ by one bit then | 1666 // If the characters differ by 2^n but don't differ by one bit then |
1676 // subtract the difference from the found character, then do the or | 1667 // subtract the difference from the found character, then do the or |
1677 // trick. We avoid the theoretical case where negative numbers are | 1668 // trick. We avoid the theoretical case where negative numbers are |
1678 // involved in order to simplify code generation. | 1669 // involved in order to simplify code generation. |
1679 macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff, | 1670 uc16 mask = String::kMaxUC16CharCode ^ diff; |
1680 diff, | 1671 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
1681 on_failure); | 1672 diff, |
| 1673 mask, |
| 1674 on_failure); |
1682 return true; | 1675 return true; |
1683 } | 1676 } |
1684 return false; | 1677 return false; |
1685 } | 1678 } |
1686 | 1679 |
1687 | 1680 |
1688 static inline void EmitAtomLetters( | 1681 // Only emits letters (things that have case). Only used for case independent |
| 1682 // matches. |
| 1683 static inline bool EmitAtomLetter( |
1689 RegExpMacroAssembler* macro_assembler, | 1684 RegExpMacroAssembler* macro_assembler, |
1690 TextElement elm, | 1685 uc16 c, |
1691 Vector<const uc16> quarks, | |
1692 Label* on_failure, | 1686 Label* on_failure, |
1693 int cp_offset, | 1687 int cp_offset, |
1694 bool check_offset) { | 1688 bool check, |
| 1689 bool preloaded) { |
1695 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1690 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
1696 // It is vital that this loop is backwards due to the unchecked character | 1691 int length = uncanonicalize.get(c, '\0', chars); |
1697 // load below. | 1692 if (length <= 1) return false; |
1698 for (int i = quarks.length() - 1; i >= 0; i--) { | 1693 // We may not need to check against the end of the input string |
1699 uc16 c = quarks[i]; | 1694 // if this character lies before a character that matched. |
1700 int length = uncanonicalize.get(c, '\0', chars); | 1695 if (!preloaded) { |
1701 if (length <= 1) continue; | 1696 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
1702 if (check_offset && i == quarks.length() - 1) { | 1697 } |
1703 macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | 1698 Label ok; |
1704 } else { | 1699 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
1705 // Here we don't need to check against the end of the input string | 1700 switch (length) { |
1706 // since this character lies before a character that matched. | 1701 case 2: { |
1707 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | 1702 if (ShortCutEmitCharacterPair(macro_assembler, |
| 1703 chars[0], |
| 1704 chars[1], |
| 1705 on_failure)) { |
| 1706 } else { |
| 1707 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1708 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
| 1709 macro_assembler->Bind(&ok); |
| 1710 } |
| 1711 break; |
1708 } | 1712 } |
1709 Label ok; | 1713 case 4: |
1710 ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1714 macro_assembler->CheckCharacter(chars[3], &ok); |
1711 switch (length) { | 1715 // Fall through! |
1712 case 2: { | 1716 case 3: |
1713 if (ShortCutEmitCharacterPair(macro_assembler, | 1717 macro_assembler->CheckCharacter(chars[0], &ok); |
1714 chars[0], | 1718 macro_assembler->CheckCharacter(chars[1], &ok); |
1715 chars[1], | 1719 macro_assembler->CheckNotCharacter(chars[2], on_failure); |
1716 on_failure)) { | 1720 macro_assembler->Bind(&ok); |
1717 } else { | 1721 break; |
1718 macro_assembler->CheckCharacter(chars[0], &ok); | 1722 default: |
1719 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1723 UNREACHABLE(); |
1720 macro_assembler->Bind(&ok); | 1724 break; |
1721 } | |
1722 break; | |
1723 } | |
1724 case 4: | |
1725 macro_assembler->CheckCharacter(chars[3], &ok); | |
1726 // Fall through! | |
1727 case 3: | |
1728 macro_assembler->CheckCharacter(chars[0], &ok); | |
1729 macro_assembler->CheckCharacter(chars[1], &ok); | |
1730 macro_assembler->CheckNotCharacter(chars[2], on_failure); | |
1731 macro_assembler->Bind(&ok); | |
1732 break; | |
1733 default: | |
1734 UNREACHABLE(); | |
1735 break; | |
1736 } | |
1737 } | 1725 } |
| 1726 return true; |
1738 } | 1727 } |
1739 | 1728 |
1740 | 1729 |
1741 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1730 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1742 RegExpCharacterClass* cc, | 1731 RegExpCharacterClass* cc, |
1743 int cp_offset, | 1732 int cp_offset, |
1744 Label* on_failure, | 1733 Label* on_failure, |
1745 bool check_offset, | 1734 bool check_offset, |
1746 bool ascii) { | 1735 bool ascii, |
| 1736 bool preloaded) { |
1747 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1737 ZoneList<CharacterRange>* ranges = cc->ranges(); |
1748 int max_char; | 1738 int max_char; |
1749 if (ascii) { | 1739 if (ascii) { |
1750 max_char = String::kMaxAsciiCharCode; | 1740 max_char = String::kMaxAsciiCharCode; |
1751 } else { | 1741 } else { |
1752 max_char = String::kMaxUC16CharCode; | 1742 max_char = String::kMaxUC16CharCode; |
1753 } | 1743 } |
1754 | 1744 |
1755 Label success; | 1745 Label success; |
1756 | 1746 |
(...skipping 25 matching lines...) Expand all Loading... |
1782 ranges->at(0).IsEverything(max_char)) { | 1772 ranges->at(0).IsEverything(max_char)) { |
1783 // This is a common case hit by non-anchored expressions. | 1773 // This is a common case hit by non-anchored expressions. |
1784 // TODO(erikcorry): We should have a macro assembler instruction that just | 1774 // TODO(erikcorry): We should have a macro assembler instruction that just |
1785 // checks for end of string without loading the character. | 1775 // checks for end of string without loading the character. |
1786 if (check_offset) { | 1776 if (check_offset) { |
1787 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1777 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); |
1788 } | 1778 } |
1789 return; | 1779 return; |
1790 } | 1780 } |
1791 | 1781 |
1792 if (check_offset) { | 1782 if (!preloaded) { |
1793 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1783 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
1794 } else { | |
1795 // Here we don't need to check against the end of the input string | |
1796 // since this character lies before a character that matched. | |
1797 macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); | |
1798 } | 1784 } |
1799 | 1785 |
1800 for (int i = 0; i <= last_valid_range; i++) { | 1786 for (int i = 0; i < last_valid_range; i++) { |
1801 CharacterRange& range = ranges->at(i); | 1787 CharacterRange& range = ranges->at(i); |
1802 Label next_range; | 1788 Label next_range; |
1803 uc16 from = range.from(); | 1789 uc16 from = range.from(); |
1804 uc16 to = range.to(); | 1790 uc16 to = range.to(); |
1805 if (from > max_char) { | 1791 if (from > max_char) { |
1806 continue; | 1792 continue; |
1807 } | 1793 } |
1808 if (to > max_char) to = max_char; | 1794 if (to > max_char) to = max_char; |
1809 if (to == from) { | 1795 if (to == from) { |
1810 macro_assembler->CheckCharacter(to, char_is_in_class); | 1796 macro_assembler->CheckCharacter(to, char_is_in_class); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1851 } else { | 1837 } else { |
1852 if (cc->is_negated()) { | 1838 if (cc->is_negated()) { |
1853 macro_assembler->GoTo(on_failure); | 1839 macro_assembler->GoTo(on_failure); |
1854 } | 1840 } |
1855 } | 1841 } |
1856 } | 1842 } |
1857 macro_assembler->Bind(&success); | 1843 macro_assembler->Bind(&success); |
1858 } | 1844 } |
1859 | 1845 |
1860 | 1846 |
| 1847 RegExpNode::~RegExpNode() { |
| 1848 } |
| 1849 |
| 1850 |
1861 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1851 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, |
1862 GenerationVariant* variant) { | 1852 GenerationVariant* variant) { |
1863 // TODO(erikcorry): Implement support. | 1853 // TODO(erikcorry): Implement support. |
1864 if (info_.follows_word_interest || | 1854 if (info_.follows_word_interest || |
1865 info_.follows_newline_interest || | 1855 info_.follows_newline_interest || |
1866 info_.follows_start_interest) { | 1856 info_.follows_start_interest) { |
1867 return FAIL; | 1857 return FAIL; |
1868 } | 1858 } |
1869 | 1859 |
1870 // If we are generating a greedy loop then don't stop and don't reuse code. | 1860 // If we are generating a greedy loop then don't stop and don't reuse code. |
(...skipping 30 matching lines...) Expand all Loading... |
1901 } | 1891 } |
1902 | 1892 |
1903 // If we get here there have been too many variants generated or recursion | 1893 // If we get here there have been too many variants generated or recursion |
1904 // is too deep. Time to switch to a generic version. The code for | 1894 // is too deep. Time to switch to a generic version. The code for |
1905 // generic versions above can handle deep recursion properly. | 1895 // generic versions above can handle deep recursion properly. |
1906 bool ok = variant->Flush(compiler, this); | 1896 bool ok = variant->Flush(compiler, this); |
1907 return ok ? DONE : FAIL; | 1897 return ok ? DONE : FAIL; |
1908 } | 1898 } |
1909 | 1899 |
1910 | 1900 |
| 1901 int ActionNode::EatsAtLeast(int recursion_depth) { |
| 1902 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; |
| 1903 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
| 1904 return on_success()->EatsAtLeast(recursion_depth + 1); |
| 1905 } |
| 1906 |
| 1907 |
| 1908 int TextNode::EatsAtLeast(int recursion_depth) { |
| 1909 int answer = Length(); |
| 1910 if (answer >= 4) return answer; |
| 1911 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer; |
| 1912 return answer + on_success()->EatsAtLeast(recursion_depth + 1); |
| 1913 } |
| 1914 |
| 1915 |
| 1916 |
| 1917 int ChoiceNode::EatsAtLeastHelper(int recursion_depth, int start_from_node) { |
| 1918 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; |
| 1919 int min = 100; |
| 1920 int choice_count = alternatives_->length(); |
| 1921 for (int i = start_from_node; i < choice_count; i++) { |
| 1922 RegExpNode* node = alternatives_->at(i).node(); |
| 1923 int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1); |
| 1924 if (node_eats_at_least < min) min = node_eats_at_least; |
| 1925 } |
| 1926 return min; |
| 1927 } |
| 1928 |
| 1929 |
| 1930 int LoopChoiceNode::EatsAtLeast(int recursion_depth) { |
| 1931 return 0; |
| 1932 } |
| 1933 |
| 1934 |
| 1935 int ChoiceNode::EatsAtLeast(int recursion_depth) { |
| 1936 return EatsAtLeastHelper(recursion_depth, 0); |
| 1937 } |
| 1938 |
| 1939 |
| 1940 // Takes the left-most 1-bit and smears it out, setting all bits to its right. |
| 1941 static inline uint32_t SmearBitsRight(uint32_t v) { |
| 1942 v |= v >> 1; |
| 1943 v |= v >> 2; |
| 1944 v |= v >> 4; |
| 1945 v |= v >> 8; |
| 1946 v |= v >> 16; |
| 1947 return v; |
| 1948 } |
| 1949 |
| 1950 |
| 1951 bool QuickCheckDetails::Rationalize(bool asc) { |
| 1952 bool found_useful_op = false; |
| 1953 uint32_t char_mask; |
| 1954 if (asc) { |
| 1955 char_mask = String::kMaxAsciiCharCode; |
| 1956 } else { |
| 1957 char_mask = String::kMaxUC16CharCode; |
| 1958 } |
| 1959 mask_ = 0; |
| 1960 value_ = 0; |
| 1961 int char_shift = 0; |
| 1962 for (int i = 0; i < characters_; i++) { |
| 1963 Position* pos = &positions_[i]; |
| 1964 if ((pos->mask & String::kMaxAsciiCharCode) != 0) { |
| 1965 found_useful_op = true; |
| 1966 } |
| 1967 mask_ |= (pos->mask & char_mask) << char_shift; |
| 1968 value_ |= (pos->value & char_mask) << char_shift; |
| 1969 char_shift += asc ? 8 : 16; |
| 1970 } |
| 1971 return found_useful_op; |
| 1972 } |
| 1973 |
| 1974 |
| 1975 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, |
| 1976 GenerationVariant* variant, |
| 1977 bool preload_has_checked_bounds, |
| 1978 Label* on_possible_success, |
| 1979 QuickCheckDetails* details, |
| 1980 bool fall_through_on_failure) { |
| 1981 if (details->characters() == 0) return false; |
| 1982 GetQuickCheckDetails(details, compiler, 0); |
| 1983 if (!details->Rationalize(compiler->ascii())) return false; |
| 1984 uint32_t mask = details->mask(); |
| 1985 uint32_t value = details->value(); |
| 1986 |
| 1987 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 1988 |
| 1989 if (variant->characters_preloaded() != details->characters()) { |
| 1990 assembler->LoadCurrentCharacter(variant->cp_offset(), |
| 1991 variant->backtrack(), |
| 1992 !preload_has_checked_bounds, |
| 1993 details->characters()); |
| 1994 } |
| 1995 |
| 1996 |
| 1997 bool need_mask = true; |
| 1998 |
| 1999 if (details->characters() == 1) { |
| 2000 // If number of characters preloaded is 1 then we used a byte or 16 bit |
| 2001 // load so the value is already masked down. |
| 2002 uint32_t char_mask; |
| 2003 if (compiler->ascii()) { |
| 2004 char_mask = String::kMaxAsciiCharCode; |
| 2005 } else { |
| 2006 char_mask = String::kMaxUC16CharCode; |
| 2007 } |
| 2008 if ((mask & char_mask) == char_mask) need_mask = false; |
| 2009 } else { |
| 2010 // For 2-character preloads in ASCII mode we also use a 16 bit load with |
| 2011 // zero extend. |
| 2012 if (details->characters() == 2 && compiler->ascii()) { |
| 2013 if ((mask & 0xffff) == 0xffff) need_mask = false; |
| 2014 } else { |
| 2015 if (mask == 0xffffffff) need_mask = false; |
| 2016 } |
| 2017 } |
| 2018 |
| 2019 if (fall_through_on_failure) { |
| 2020 if (need_mask) { |
| 2021 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); |
| 2022 } else { |
| 2023 assembler->CheckCharacter(value, on_possible_success); |
| 2024 } |
| 2025 } else { |
| 2026 if (need_mask) { |
| 2027 assembler->CheckNotCharacterAfterAnd(value, mask, variant->backtrack()); |
| 2028 } else { |
| 2029 assembler->CheckNotCharacter(value, variant->backtrack()); |
| 2030 } |
| 2031 } |
| 2032 return true; |
| 2033 } |
| 2034 |
| 2035 |
| 2036 // Here is the meat of GetQuickCheckDetails (see also the comment on the |
| 2037 // super-class in the .h file). |
| 2038 // |
| 2039 // We iterate along the text object, building up for each character a |
| 2040 // mask and value that can be used to test for a quick failure to match. |
| 2041 // The masks and values for the positions will be combined into a single |
| 2042 // machine word for the current character width in order to be used in |
| 2043 // generating a quick check. |
| 2044 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2045 RegExpCompiler* compiler, |
| 2046 int characters_filled_in) { |
| 2047 ASSERT(characters_filled_in < details->characters()); |
| 2048 int characters = details->characters(); |
| 2049 int char_mask; |
| 2050 int char_shift; |
| 2051 if (compiler->ascii()) { |
| 2052 char_mask = String::kMaxAsciiCharCode; |
| 2053 char_shift = 8; |
| 2054 } else { |
| 2055 char_mask = String::kMaxUC16CharCode; |
| 2056 char_shift = 16; |
| 2057 } |
| 2058 for (int k = 0; k < elms_->length(); k++) { |
| 2059 TextElement elm = elms_->at(k); |
| 2060 if (elm.type == TextElement::ATOM) { |
| 2061 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2062 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2063 QuickCheckDetails::Position* pos = |
| 2064 details->positions(characters_filled_in); |
| 2065 if (compiler->ignore_case()) { |
| 2066 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 2067 uc16 c = quarks[i]; |
| 2068 int length = uncanonicalize.get(c, '\0', chars); |
| 2069 if (length < 2) { |
| 2070 // This letter has no case equivalents, so it's nice and simple |
| 2071 // and the mask-compare will determine definitely whether we have |
| 2072 // a match at this character position. |
| 2073 pos->mask = char_mask; |
| 2074 pos->value = c; |
| 2075 pos->determines_perfectly = true; |
| 2076 } else { |
| 2077 uint32_t common_bits = char_mask; |
| 2078 uint32_t bits = chars[0]; |
| 2079 for (int j = 1; j < length; j++) { |
| 2080 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); |
| 2081 common_bits ^= differing_bits; |
| 2082 bits &= common_bits; |
| 2083 } |
| 2084 // If length is 2 and common bits has only one zero in it then |
| 2085 // our mask and compare instruction will determine definitely |
| 2086 // whether we have a match at this character position. Otherwise |
| 2087 // it can only be an approximate check. |
| 2088 uint32_t one_zero = (common_bits | ~char_mask); |
| 2089 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { |
| 2090 pos->determines_perfectly = true; |
| 2091 } |
| 2092 pos->mask = common_bits; |
| 2093 pos->value = bits; |
| 2094 } |
| 2095 } else { |
| 2096 // Don't ignore case. Nice simple case where the mask-compare will |
| 2097 // determine definitely whether we have a match at this character |
| 2098 // position. |
| 2099 pos->mask = char_mask; |
| 2100 pos->value = quarks[i]; |
| 2101 pos->determines_perfectly = true; |
| 2102 } |
| 2103 characters_filled_in++; |
| 2104 ASSERT(characters_filled_in <= details->characters()); |
| 2105 if (characters_filled_in == details->characters()) { |
| 2106 return; |
| 2107 } |
| 2108 } |
| 2109 } else { |
| 2110 QuickCheckDetails::Position* pos = |
| 2111 details->positions(characters_filled_in); |
| 2112 RegExpCharacterClass* tree = elm.data.u_char_class; |
| 2113 ZoneList<CharacterRange>* ranges = tree->ranges(); |
| 2114 CharacterRange range = ranges->at(0); |
| 2115 if (tree->is_negated()) { |
| 2116 // A quick check uses multi-character mask and compare. There is no |
| 2117 // useful way to incorporate a negative char class into this scheme |
| 2118 // so we just conservatively create a mask and value that will always |
| 2119 // succeed. |
| 2120 pos->mask = 0; |
| 2121 pos->value = 0; |
| 2122 } else { |
| 2123 uint32_t differing_bits = (range.from() ^ range.to()); |
| 2124 // A mask and compare is only perfect if the differing bits form a |
| 2125 // number like 00011111 with one single block of trailing 1s. |
| 2126 if ((differing_bits & (differing_bits + 1)) == 0) { |
| 2127 pos->determines_perfectly = true; |
| 2128 } |
| 2129 uint32_t common_bits = ~SmearBitsRight(differing_bits); |
| 2130 uint32_t bits = (range.from() & common_bits); |
| 2131 for (int i = 1; i < ranges->length(); i++) { |
| 2132 // Here we are combining more ranges into the mask and compare |
| 2133 // value. With each new range the mask becomes more sparse and |
| 2134 // so the chances of a false positive rise. A character class |
| 2135 // with multiple ranges is assumed never to be equivalent to a |
| 2136 // mask and compare operation. |
| 2137 pos->determines_perfectly = false; |
| 2138 CharacterRange range = ranges->at(i); |
| 2139 uint32_t new_common_bits = (range.from() ^ range.to()); |
| 2140 new_common_bits = ~SmearBitsRight(new_common_bits); |
| 2141 common_bits &= new_common_bits; |
| 2142 bits &= new_common_bits; |
| 2143 uint32_t differing_bits = (range.from() & common_bits) ^ bits; |
| 2144 common_bits ^= differing_bits; |
| 2145 bits &= common_bits; |
| 2146 } |
| 2147 pos->mask = common_bits; |
| 2148 pos->value = bits; |
| 2149 } |
| 2150 characters_filled_in++; |
| 2151 ASSERT(characters_filled_in <= details->characters()); |
| 2152 if (characters_filled_in == details->characters()) { |
| 2153 return; |
| 2154 } |
| 2155 } |
| 2156 } |
| 2157 ASSERT(characters_filled_in != details->characters()); |
| 2158 on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in); |
| 2159 } |
| 2160 |
| 2161 |
| 2162 void QuickCheckDetails::Clear() { |
| 2163 for (int i = 0; i < characters_; i++) { |
| 2164 positions_[i].mask = 0; |
| 2165 positions_[i].value = 0; |
| 2166 positions_[i].determines_perfectly = false; |
| 2167 } |
| 2168 characters_ = 0; |
| 2169 } |
| 2170 |
| 2171 |
| 2172 void QuickCheckDetails::Advance(int by, bool ascii) { |
| 2173 ASSERT(by > 0); |
| 2174 if (by >= characters_) { |
| 2175 Clear(); |
| 2176 return; |
| 2177 } |
| 2178 for (int i = 0; i < characters_ - by; i++) { |
| 2179 positions_[i] = positions_[by + i]; |
| 2180 } |
| 2181 for (int i = characters_ - by; i < characters_; i++) { |
| 2182 positions_[i].mask = 0; |
| 2183 positions_[i].value = 0; |
| 2184 positions_[i].determines_perfectly = false; |
| 2185 } |
| 2186 characters_ -= by; |
| 2187 // We could change mask_ and value_ here but we would never advance unless |
| 2188 // they had already been used in a check and they won't be used again because |
| 2189 // it would gain us nothing. So there's no point. |
| 2190 } |
| 2191 |
| 2192 |
| 2193 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) { |
| 2194 ASSERT(characters_ == other->characters_); |
| 2195 for (int i = from_index; i < characters_; i++) { |
| 2196 QuickCheckDetails::Position* pos = positions(i); |
| 2197 QuickCheckDetails::Position* other_pos = other->positions(i); |
| 2198 if (pos->mask != other_pos->mask || |
| 2199 pos->value != other_pos->value || |
| 2200 !other_pos->determines_perfectly) { |
| 2201 // Our mask-compare operation will be approximate unless we have the |
| 2202 // exact same operation on both sides of the alternation. |
| 2203 pos->determines_perfectly = false; |
| 2204 } |
| 2205 pos->mask &= other_pos->mask; |
| 2206 pos->value &= pos->mask; |
| 2207 other_pos->value &= pos->mask; |
| 2208 uc16 differing_bits = (pos->value ^ other_pos->value); |
| 2209 pos->mask &= ~differing_bits; |
| 2210 pos->value &= pos->mask; |
| 2211 } |
| 2212 } |
| 2213 |
| 2214 |
| 2215 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2216 RegExpCompiler* compiler, |
| 2217 int characters_filled_in) { |
| 2218 int choice_count = alternatives_->length(); |
| 2219 ASSERT(choice_count > 0); |
| 2220 alternatives_->at(0).node()->GetQuickCheckDetails(details, |
| 2221 compiler, |
| 2222 characters_filled_in); |
| 2223 for (int i = 1; i < choice_count; i++) { |
| 2224 QuickCheckDetails new_details(details->characters()); |
| 2225 RegExpNode* node = alternatives_->at(i).node(); |
| 2226 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in); |
| 2227 // Here we merge the quick match details of the two branches. |
| 2228 details->Merge(&new_details, characters_filled_in); |
| 2229 } |
| 2230 } |
| 2231 |
| 2232 |
| 2233 // We call this repeatedly to generate code for each pass over the text node. |
| 2234 // The passes are in increasing order of difficulty because we hope one |
| 2235 // of the first passes will fail in which case we are saved the work of the |
| 2236 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ |
| 2237 // we will check the '%' in the first pass, the case independent 'a' in the |
| 2238 // second pass and the character class in the last pass. |
| 2239 // |
| 2240 // The passes are done from right to left, so for example to test for /bar/ |
| 2241 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 |
| 2242 // and then a 'b' with offset 0. This means we can avoid the end-of-input |
| 2243 // bounds check most of the time. In the example we only need to check for |
| 2244 // end-of-input when loading the putative 'r'. |
| 2245 // |
| 2246 // A slight complication involves the fact that the first character may already |
| 2247 // be fetched into a register by the previous node. In this case we want to |
| 2248 // do the test for that character first. We do this in separate passes. The |
| 2249 // 'preloaded' argument indicates that we are doing such a 'pass'. If such a |
| 2250 // pass has been performed then subsequent passes will have true in |
| 2251 // first_element_checked to indicate that that character does not need to be |
| 2252 // checked again. |
| 2253 // |
| 2254 // In addition to all this we are passed a GenerationVariant, which can |
| 2255 // contain an AlternativeGeneration object. In this AlternativeGeneration |
| 2256 // object we can see details of any quick check that was already passed in |
| 2257 // order to get to the code we are now generating. The quick check can involve |
| 2258 // loading characters, which means we do not need to recheck the bounds |
| 2259 // up to the limit the quick check already checked. In addition the quick |
| 2260 // check can have involved a mask and compare operation which may simplify |
| 2261 // or obviate the need for further checks at some character positions. |
| 2262 void TextNode::TextEmitPass(RegExpCompiler* compiler, |
| 2263 TextEmitPassType pass, |
| 2264 bool preloaded, |
| 2265 GenerationVariant* variant, |
| 2266 bool first_element_checked, |
| 2267 int* checked_up_to) { |
| 2268 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2269 bool ascii = compiler->ascii(); |
| 2270 Label* backtrack = variant->backtrack(); |
| 2271 QuickCheckDetails* quick_check = variant->quick_check_performed(); |
| 2272 int element_count = elms_->length(); |
| 2273 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
| 2274 TextElement elm = elms_->at(i); |
| 2275 int cp_offset = variant->cp_offset() + elm.cp_offset; |
| 2276 if (elm.type == TextElement::ATOM) { |
| 2277 if (pass == NON_ASCII_MATCH || |
| 2278 pass == CHARACTER_MATCH || |
| 2279 pass == CASE_CHARACTER_MATCH) { |
| 2280 Vector<const uc16> quarks = elm.data.u_atom->data(); |
| 2281 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 2282 bool bound_checked = true; // Most ops will check their bounds. |
| 2283 if (first_element_checked && i == 0 && j == 0) continue; |
| 2284 if (quick_check != NULL && |
| 2285 elm.cp_offset + j < quick_check->characters() && |
| 2286 quick_check->positions(elm.cp_offset + j)->determines_perfectly) { |
| 2287 continue; |
| 2288 } |
| 2289 if (pass == NON_ASCII_MATCH) { |
| 2290 ASSERT(ascii); |
| 2291 if (quarks[j] > String::kMaxAsciiCharCode) { |
| 2292 assembler->GoTo(backtrack); |
| 2293 return; |
| 2294 } |
| 2295 } else if (pass == CHARACTER_MATCH) { |
| 2296 if (compiler->ignore_case()) { |
| 2297 bound_checked = EmitAtomNonLetter(assembler, |
| 2298 quarks[j], |
| 2299 backtrack, |
| 2300 cp_offset + j, |
| 2301 *checked_up_to < cp_offset + j, |
| 2302 preloaded); |
| 2303 } else { |
| 2304 if (!preloaded) { |
| 2305 assembler->LoadCurrentCharacter(cp_offset + j, |
| 2306 backtrack, |
| 2307 *checked_up_to < cp_offset + j); |
| 2308 } |
| 2309 assembler->CheckNotCharacter(quarks[j], backtrack); |
| 2310 } |
| 2311 } else { |
| 2312 ASSERT_EQ(pass, CASE_CHARACTER_MATCH); |
| 2313 ASSERT(compiler->ignore_case()); |
| 2314 bound_checked = EmitAtomLetter(assembler, |
| 2315 quarks[j], |
| 2316 backtrack, |
| 2317 cp_offset + j, |
| 2318 *checked_up_to < cp_offset + j, |
| 2319 preloaded); |
| 2320 } |
| 2321 if (pass != NON_ASCII_MATCH && bound_checked) { |
| 2322 if (cp_offset + j > *checked_up_to) { |
| 2323 *checked_up_to = cp_offset + j; |
| 2324 } |
| 2325 } |
| 2326 } |
| 2327 } |
| 2328 } else { |
| 2329 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); |
| 2330 if (first_element_checked && i == 0) continue; |
| 2331 if (quick_check != NULL && |
| 2332 elm.cp_offset < quick_check->characters() && |
| 2333 quick_check->positions(elm.cp_offset)->determines_perfectly) { |
| 2334 continue; |
| 2335 } |
| 2336 if (pass == CHARACTER_CLASS_MATCH) { |
| 2337 RegExpCharacterClass* cc = elm.data.u_char_class; |
| 2338 EmitCharClass(assembler, |
| 2339 cc, |
| 2340 cp_offset, |
| 2341 backtrack, |
| 2342 *checked_up_to < cp_offset, |
| 2343 ascii, |
| 2344 preloaded); |
| 2345 if (cp_offset > *checked_up_to) { |
| 2346 *checked_up_to = cp_offset; |
| 2347 } |
| 2348 } |
| 2349 } |
| 2350 } |
| 2351 } |
| 2352 |
| 2353 |
| 2354 int TextNode::Length() { |
| 2355 TextElement elm = elms_->last(); |
| 2356 ASSERT(elm.cp_offset >= 0); |
| 2357 if (elm.type == TextElement::ATOM) { |
| 2358 return elm.cp_offset + elm.data.u_atom->data().length(); |
| 2359 } else { |
| 2360 return elm.cp_offset + 1; |
| 2361 } |
| 2362 } |
| 2363 |
| 2364 |
1911 // This generates the code to match a text node. A text node can contain | 2365 // This generates the code to match a text node. A text node can contain |
1912 // straight character sequences (possibly to be matched in a case-independent | 2366 // straight character sequences (possibly to be matched in a case-independent |
1913 // way) and character classes. In order to be most efficient we test for the | 2367 // way) and character classes. For efficiency we do not do this in a single |
1914 // simple things first and then move on to the more complicated things. The | 2368 // pass from left to right. Instead we pass over the text node several times, |
1915 // simplest thing is a non-letter or a letter if we are matching case. The | 2369 // emitting code for some character positions every time. See the comment on |
1916 // next-most simple thing is a case-independent letter. The least simple is | 2370 // TextEmitPass for details. |
1917 // a character class. Another optimization is that we test the last one first. | |
1918 // If that succeeds we don't need to test for the end of the string when we | |
1919 // load other characters. | |
1920 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 2371 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { |
1921 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | |
1922 Label *backtrack = variant->backtrack(); | |
1923 LimitResult limit_result = LimitVersions(compiler, variant); | 2372 LimitResult limit_result = LimitVersions(compiler, variant); |
1924 if (limit_result == FAIL) return false; | 2373 if (limit_result == FAIL) return false; |
1925 if (limit_result == DONE) return true; | 2374 if (limit_result == DONE) return true; |
1926 ASSERT(limit_result == CONTINUE); | 2375 ASSERT(limit_result == CONTINUE); |
1927 | 2376 |
1928 int element_count = elms_->length(); | 2377 if (info()->follows_word_interest || |
1929 ASSERT(element_count != 0); | 2378 info()->follows_newline_interest || |
| 2379 info()->follows_start_interest) { |
| 2380 return false; |
| 2381 } |
| 2382 |
1930 if (info()->at_end) { | 2383 if (info()->at_end) { |
1931 macro_assembler->GoTo(backtrack); | 2384 compiler->macro_assembler()->GoTo(variant->backtrack()); |
1932 return true; | 2385 return true; |
1933 } | 2386 } |
1934 // First check for non-ASCII text. | 2387 |
1935 // TODO(plesner): We should do this at node level. | |
1936 if (compiler->ascii()) { | 2388 if (compiler->ascii()) { |
1937 for (int i = element_count - 1; i >= 0; i--) { | 2389 int dummy = 0; |
1938 TextElement elm = elms_->at(i); | 2390 TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy); |
1939 if (elm.type == TextElement::ATOM) { | 2391 } |
1940 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2392 |
1941 for (int j = quarks.length() - 1; j >= 0; j--) { | 2393 bool first_elt_done = false; |
1942 if (quarks[j] > String::kMaxAsciiCharCode) { | 2394 int bound_checked_to = variant->cp_offset() - 1; |
1943 macro_assembler->GoTo(backtrack); | 2395 QuickCheckDetails* quick_check = variant->quick_check_performed(); |
1944 return true; | 2396 if (quick_check != NULL) { |
1945 } | 2397 bound_checked_to += quick_check->characters(); |
1946 } | 2398 } |
1947 } else { | 2399 |
1948 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 2400 // If a character is preloaded into the current character register then |
1949 } | 2401 // check that now. |
1950 } | 2402 if (variant->characters_preloaded() == 1) { |
1951 } | 2403 TextEmitPass(compiler, |
1952 // Second, handle straight character matches. | 2404 CHARACTER_MATCH, |
1953 int checked_up_to = -1; | 2405 true, |
1954 for (int i = element_count - 1; i >= 0; i--) { | 2406 variant, |
1955 TextElement elm = elms_->at(i); | 2407 false, |
1956 ASSERT(elm.cp_offset >= 0); | 2408 &bound_checked_to); |
1957 int cp_offset = variant->cp_offset() + elm.cp_offset; | 2409 if (compiler->ignore_case()) { |
1958 if (elm.type == TextElement::ATOM) { | 2410 TextEmitPass(compiler, |
1959 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2411 CASE_CHARACTER_MATCH, |
1960 int last_cp_offset = cp_offset + quarks.length(); | 2412 true, |
1961 if (compiler->ignore_case()) { | 2413 variant, |
1962 EmitAtomNonLetters(macro_assembler, | 2414 false, |
1963 elm, | 2415 &bound_checked_to); |
1964 quarks, | 2416 } |
1965 backtrack, | 2417 TextEmitPass(compiler, |
1966 cp_offset, | 2418 CHARACTER_CLASS_MATCH, |
1967 checked_up_to < last_cp_offset); | 2419 true, |
1968 } else { | 2420 variant, |
1969 macro_assembler->CheckCharacters(quarks, | 2421 false, |
1970 cp_offset, | 2422 &bound_checked_to); |
1971 backtrack, | 2423 first_elt_done = true; |
1972 checked_up_to < last_cp_offset); | 2424 } |
1973 } | 2425 |
1974 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 2426 TextEmitPass(compiler, |
1975 } else { | 2427 CHARACTER_MATCH, |
1976 ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 2428 false, |
1977 } | 2429 variant, |
1978 } | 2430 first_elt_done, |
1979 // Third, handle case independent letter matches if any. | 2431 &bound_checked_to); |
1980 if (compiler->ignore_case()) { | 2432 if (compiler->ignore_case()) { |
1981 for (int i = element_count - 1; i >= 0; i--) { | 2433 TextEmitPass(compiler, |
1982 TextElement elm = elms_->at(i); | 2434 CASE_CHARACTER_MATCH, |
1983 int cp_offset = variant->cp_offset() + elm.cp_offset; | 2435 false, |
1984 if (elm.type == TextElement::ATOM) { | 2436 variant, |
1985 Vector<const uc16> quarks = elm.data.u_atom->data(); | 2437 first_elt_done, |
1986 int last_cp_offset = cp_offset + quarks.length(); | 2438 &bound_checked_to); |
1987 EmitAtomLetters(macro_assembler, | 2439 } |
1988 elm, | 2440 TextEmitPass(compiler, |
1989 quarks, | 2441 CHARACTER_CLASS_MATCH, |
1990 backtrack, | 2442 false, |
1991 cp_offset, | 2443 variant, |
1992 checked_up_to < last_cp_offset); | 2444 first_elt_done, |
1993 if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 2445 &bound_checked_to); |
1994 } | 2446 |
1995 } | 2447 GenerationVariant successor_variant(*variant); |
1996 } | 2448 successor_variant.AdvanceVariant(Length(), compiler->ascii()); |
1997 // If the fast character matches passed then do the character classes. | |
1998 for (int i = element_count - 1; i >= 0; i--) { | |
1999 TextElement elm = elms_->at(i); | |
2000 int cp_offset = variant->cp_offset() + elm.cp_offset; | |
2001 if (elm.type == TextElement::CHAR_CLASS) { | |
2002 RegExpCharacterClass* cc = elm.data.u_char_class; | |
2003 EmitCharClass(macro_assembler, | |
2004 cc, | |
2005 cp_offset, | |
2006 backtrack, | |
2007 checked_up_to < cp_offset, | |
2008 compiler->ascii()); | |
2009 if (cp_offset > checked_up_to) checked_up_to = cp_offset; | |
2010 } | |
2011 } | |
2012 | |
2013 GenerationVariant new_variant(*variant); | |
2014 new_variant.set_cp_offset(checked_up_to + 1); | |
2015 RecursionCheck rc(compiler); | 2449 RecursionCheck rc(compiler); |
2016 return on_success()->Emit(compiler, &new_variant); | 2450 return on_success()->Emit(compiler, &successor_variant); |
| 2451 } |
| 2452 |
| 2453 |
| 2454 void GenerationVariant::AdvanceVariant(int by, bool ascii) { |
| 2455 ASSERT(by > 0); |
| 2456 // We don't have an instruction for shifting the current character register |
| 2457 // down or for using a shifted value for anything so lets just forget that |
| 2458 // we preloaded any characters into it. |
| 2459 characters_preloaded_ = 0; |
| 2460 // Adjust the offsets of the quick check performed information. This |
| 2461 // information is used to find out what we already determined about the |
| 2462 // characters by means of mask and compare. |
| 2463 quick_check_performed_.Advance(by, ascii); |
| 2464 cp_offset_ += by; |
2017 } | 2465 } |
2018 | 2466 |
2019 | 2467 |
2020 void TextNode::MakeCaseIndependent() { | 2468 void TextNode::MakeCaseIndependent() { |
2021 int element_count = elms_->length(); | 2469 int element_count = elms_->length(); |
2022 for (int i = 0; i < element_count; i++) { | 2470 for (int i = 0; i < element_count; i++) { |
2023 TextElement elm = elms_->at(i); | 2471 TextElement elm = elms_->at(i); |
2024 if (elm.type == TextElement::CHAR_CLASS) { | 2472 if (elm.type == TextElement::CHAR_CLASS) { |
2025 RegExpCharacterClass* cc = elm.data.u_char_class; | 2473 RegExpCharacterClass* cc = elm.data.u_char_class; |
2026 ZoneList<CharacterRange>* ranges = cc->ranges(); | 2474 ZoneList<CharacterRange>* ranges = cc->ranges(); |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2103 return true; | 2551 return true; |
2104 } | 2552 } |
2105 ASSERT(variant->stop_node() == NULL); | 2553 ASSERT(variant->stop_node() == NULL); |
2106 if (!variant->is_trivial()) { | 2554 if (!variant->is_trivial()) { |
2107 return variant->Flush(compiler, this); | 2555 return variant->Flush(compiler, this); |
2108 } | 2556 } |
2109 return ChoiceNode::Emit(compiler, variant); | 2557 return ChoiceNode::Emit(compiler, variant); |
2110 } | 2558 } |
2111 | 2559 |
2112 | 2560 |
| 2561 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, |
| 2562 int start_from_node) { |
| 2563 bool ascii = compiler->ascii(); |
| 2564 int preload_characters = ChoiceNode::EatsAtLeastHelper(0, start_from_node); |
| 2565 #ifdef CAN_READ_UNALIGNED |
| 2566 if (ascii) { |
| 2567 if (preload_characters > 4) preload_characters = 4; |
| 2568 // We can't preload 3 characters because there is no machine instruction |
| 2569 // to do that. We can't just load 4 because we could be reading |
| 2570 // beyond the end of the string, which could cause a memory fault. |
| 2571 if (preload_characters == 3) preload_characters = 2; |
| 2572 } else { |
| 2573 if (preload_characters > 2) preload_characters = 2; |
| 2574 } |
| 2575 #else |
| 2576 if (preload_characters > 1) preload_characters = 1; |
| 2577 #endif |
| 2578 return preload_characters; |
| 2579 } |
| 2580 |
| 2581 |
| 2582 // This class is used when generating the alternatives in a choice node. It |
| 2583 // records the way the alternative is being code generated. |
| 2584 class AlternativeGeneration: public Malloced { |
| 2585 public: |
| 2586 AlternativeGeneration() |
| 2587 : possible_success(), |
| 2588 expects_preload(false), |
| 2589 after(), |
| 2590 quick_check_details() { } |
| 2591 Label possible_success; |
| 2592 bool expects_preload; |
| 2593 Label after; |
| 2594 QuickCheckDetails quick_check_details; |
| 2595 }; |
| 2596 |
| 2597 |
| 2598 // Creates a list of AlternativeGenerations. If the list has a reasonable |
| 2599 // size then it is on the stack, otherwise the excess is on the heap. |
| 2600 class AlternativeGenerationList { |
| 2601 public: |
| 2602 explicit AlternativeGenerationList(int count) |
| 2603 : alt_gens_(count) { |
| 2604 for (int i = 0; i < count && i < kAFew; i++) { |
| 2605 alt_gens_.Add(a_few_alt_gens_ + i); |
| 2606 } |
| 2607 for (int i = kAFew; i < count; i++) { |
| 2608 alt_gens_.Add(new AlternativeGeneration()); |
| 2609 } |
| 2610 } |
| 2611 ~AlternativeGenerationList() { |
| 2612 for (int i = 0; i < alt_gens_.length(); i++) { |
| 2613 alt_gens_[i]->possible_success.Unuse(); |
| 2614 alt_gens_[i]->after.Unuse(); |
| 2615 } |
| 2616 for (int i = kAFew; i < alt_gens_.length(); i++) { |
| 2617 delete alt_gens_[i]; |
| 2618 alt_gens_[i] = NULL; |
| 2619 } |
| 2620 } |
| 2621 |
| 2622 AlternativeGeneration* at(int i) { |
| 2623 return alt_gens_[i]; |
| 2624 } |
| 2625 private: |
| 2626 static const int kAFew = 10; |
| 2627 ZoneList<AlternativeGeneration*> alt_gens_; |
| 2628 AlternativeGeneration a_few_alt_gens_[kAFew]; |
| 2629 }; |
| 2630 |
| 2631 |
| 2632 /* Code generation for choice nodes. |
| 2633 * |
| 2634 * We generate quick checks that do a mask and compare to eliminate a |
| 2635 * choice. If the quick check succeeds then it jumps to the continuation to |
| 2636 * do slow checks and check subsequent nodes. If it fails (the common case) |
| 2637 * it falls through to the next choice. |
| 2638 * |
| 2639 * Here is the desired flow graph. Nodes directly below each other imply |
| 2640 * fallthrough. Alternatives 1 and 2 have quick checks. Alternative |
| 2641 * 3 doesn't have a quick check so we have to call the slow check. |
| 2642 * Nodes are marked Qn for quick checks and Sn for slow checks. The entire |
| 2643 * regexp continuation is generated directly after the Sn node, up to the |
| 2644 * next GoTo if we decide to reuse some already generated code. Some |
| 2645 * nodes expect preload_characters to be preloaded into the current |
| 2646 * character register. R nodes do this preloading. Vertices are marked |
| 2647 * F for failures and S for success (possible success in the case of quick |
| 2648 * nodes). L, V, < and > are used as arrow heads. |
| 2649 * |
| 2650 * ----------> R |
| 2651 * | |
| 2652 * V |
| 2653 * Q1 -----> S1 |
| 2654 * | S / |
| 2655 * F| / |
| 2656 * | F/ |
| 2657 * | / |
| 2658 * | R |
| 2659 * | / |
| 2660 * V L |
| 2661 * Q2 -----> S2 |
| 2662 * | S / |
| 2663 * F| / |
| 2664 * | F/ |
| 2665 * | / |
| 2666 * | R |
| 2667 * | / |
| 2668 * V L |
| 2669 * S3 |
| 2670 * | |
| 2671 * F| |
| 2672 * | |
| 2673 * R |
| 2674 * | |
| 2675 * backtrack V |
| 2676 * <----------Q4 |
| 2677 * \ F | |
| 2678 * \ |S |
| 2679 * \ F V |
| 2680 * \-----S4 |
| 2681 * |
| 2682 * For greedy loops we reverse our expectation and expect to match rather |
| 2683 * than fail. Therefore we want the loop code to look like this (U is the |
| 2684 * unwind code that steps back in the greedy loop). The following alternatives |
| 2685 * look the same as above. |
| 2686 * _____ |
| 2687 * / \ |
| 2688 * V | |
| 2689 * ----------> S1 | |
| 2690 * /| | |
| 2691 * / |S | |
| 2692 * F/ \_____/ |
| 2693 * / |
| 2694 * |<----------- |
| 2695 * | \ |
| 2696 * V \ |
| 2697 * Q2 ---> S2 \ |
| 2698 * | S / | |
| 2699 * F| / | |
| 2700 * | F/ | |
| 2701 * | / | |
| 2702 * | R | |
| 2703 * | / | |
| 2704 * F VL | |
| 2705 * <------U | |
| 2706 * back |S | |
| 2707 * \______________/ |
| 2708 */ |
| 2709 |
| 2710 |
2113 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 2711 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { |
2114 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 2712 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
2115 int choice_count = alternatives_->length(); | 2713 int choice_count = alternatives_->length(); |
2116 #ifdef DEBUG | 2714 #ifdef DEBUG |
2117 for (int i = 0; i < choice_count - 1; i++) { | 2715 for (int i = 0; i < choice_count - 1; i++) { |
2118 GuardedAlternative alternative = alternatives_->at(i); | 2716 GuardedAlternative alternative = alternatives_->at(i); |
2119 ZoneList<Guard*>* guards = alternative.guards(); | 2717 ZoneList<Guard*>* guards = alternative.guards(); |
2120 int guard_count = (guards == NULL) ? 0 : guards->length(); | 2718 int guard_count = (guards == NULL) ? 0 : guards->length(); |
2121 for (int j = 0; j < guard_count; j++) { | 2719 for (int j = 0; j < guard_count; j++) { |
2122 ASSERT(!variant->mentions_reg(guards->at(j)->reg())); | 2720 ASSERT(!variant->mentions_reg(guards->at(j)->reg())); |
2123 } | 2721 } |
2124 } | 2722 } |
2125 #endif | 2723 #endif |
2126 | 2724 |
2127 LimitResult limit_result = LimitVersions(compiler, variant); | 2725 LimitResult limit_result = LimitVersions(compiler, variant); |
2128 if (limit_result == DONE) return true; | 2726 if (limit_result == DONE) return true; |
2129 if (limit_result == FAIL) return false; | 2727 if (limit_result == FAIL) return false; |
2130 ASSERT(limit_result == CONTINUE); | 2728 ASSERT(limit_result == CONTINUE); |
2131 | 2729 |
2132 RecursionCheck rc(compiler); | 2730 RecursionCheck rc(compiler); |
2133 | 2731 |
2134 GenerationVariant* current_variant = variant; | 2732 GenerationVariant* current_variant = variant; |
2135 | 2733 |
2136 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | 2734 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); |
2137 bool greedy_loop = false; | 2735 bool greedy_loop = false; |
2138 Label greedy_loop_label; | 2736 Label greedy_loop_label; |
2139 GenerationVariant counter_backtrack_variant(&greedy_loop_label); | 2737 GenerationVariant counter_backtrack_variant; |
| 2738 counter_backtrack_variant.set_backtrack(&greedy_loop_label); |
2140 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 2739 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { |
2141 // Here we have special handling for greedy loops containing only text nodes | 2740 // Here we have special handling for greedy loops containing only text nodes |
2142 // and other simple nodes. These are handled by pushing the current | 2741 // and other simple nodes. These are handled by pushing the current |
2143 // position on the stack and then incrementing the current position each | 2742 // position on the stack and then incrementing the current position each |
2144 // time around the switch. On backtrack we decrement the current position | 2743 // time around the switch. On backtrack we decrement the current position |
2145 // and check it against the pushed value. This avoids pushing backtrack | 2744 // and check it against the pushed value. This avoids pushing backtrack |
2146 // information for each iteration of the loop, which could take up a lot of | 2745 // information for each iteration of the loop, which could take up a lot of |
2147 // space. | 2746 // space. |
2148 greedy_loop = true; | 2747 greedy_loop = true; |
2149 ASSERT(variant->stop_node() == NULL); | 2748 ASSERT(variant->stop_node() == NULL); |
2150 macro_assembler->PushCurrentPosition(); | 2749 macro_assembler->PushCurrentPosition(); |
2151 current_variant = &counter_backtrack_variant; | 2750 current_variant = &counter_backtrack_variant; |
2152 Label greedy_match_failed; | 2751 Label greedy_match_failed; |
2153 GenerationVariant greedy_match_variant(&greedy_match_failed); | 2752 GenerationVariant greedy_match_variant; |
| 2753 greedy_match_variant.set_backtrack(&greedy_match_failed); |
2154 Label loop_label; | 2754 Label loop_label; |
2155 macro_assembler->Bind(&loop_label); | 2755 macro_assembler->Bind(&loop_label); |
2156 greedy_match_variant.set_stop_node(this); | 2756 greedy_match_variant.set_stop_node(this); |
2157 greedy_match_variant.set_loop_label(&loop_label); | 2757 greedy_match_variant.set_loop_label(&loop_label); |
2158 bool ok = alternatives_->at(0).node()->Emit(compiler, | 2758 bool ok = alternatives_->at(0).node()->Emit(compiler, |
2159 &greedy_match_variant); | 2759 &greedy_match_variant); |
2160 macro_assembler->Bind(&greedy_match_failed); | 2760 macro_assembler->Bind(&greedy_match_failed); |
2161 if (!ok) { | 2761 if (!ok) { |
2162 greedy_loop_label.Unuse(); | 2762 greedy_loop_label.Unuse(); |
2163 return false; | 2763 return false; |
2164 } | 2764 } |
2165 } | 2765 } |
2166 | 2766 |
2167 Label second_choice; // For use in greedy matches. | 2767 Label second_choice; // For use in greedy matches. |
2168 macro_assembler->Bind(&second_choice); | 2768 macro_assembler->Bind(&second_choice); |
2169 | 2769 |
| 2770 int first_normal_choice = greedy_loop ? 1 : 0; |
| 2771 |
| 2772 int preload_characters = CalculatePreloadCharacters(compiler, |
| 2773 first_normal_choice); |
| 2774 bool preload_is_current = false; |
| 2775 bool preload_has_checked_bounds = false; |
| 2776 |
| 2777 AlternativeGenerationList alt_gens(choice_count); |
| 2778 |
2170 // For now we just call all choices one after the other. The idea ultimately | 2779 // For now we just call all choices one after the other. The idea ultimately |
2171 // is to use the Dispatch table to try only the relevant ones. | 2780 // is to use the Dispatch table to try only the relevant ones. |
2172 for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) { | 2781 for (int i = first_normal_choice; i < choice_count; i++) { |
2173 GuardedAlternative alternative = alternatives_->at(i); | 2782 GuardedAlternative alternative = alternatives_->at(i); |
2174 Label after; | 2783 AlternativeGeneration* alt_gen(alt_gens.at(i)); |
| 2784 alt_gen->quick_check_details.set_characters(preload_characters); |
2175 ZoneList<Guard*>* guards = alternative.guards(); | 2785 ZoneList<Guard*>* guards = alternative.guards(); |
2176 int guard_count = (guards == NULL) ? 0 : guards->length(); | 2786 int guard_count = (guards == NULL) ? 0 : guards->length(); |
| 2787 |
2177 GenerationVariant new_variant(*current_variant); | 2788 GenerationVariant new_variant(*current_variant); |
2178 new_variant.set_backtrack(&after); | 2789 new_variant.set_characters_preloaded(preload_is_current ? |
2179 for (int j = 0; j < guard_count; j++) { | 2790 preload_characters : |
2180 GenerateGuard(macro_assembler, guards->at(j), &new_variant); | 2791 0); |
| 2792 new_variant.quick_check_performed()->Clear(); |
| 2793 alt_gen->expects_preload = preload_is_current; |
| 2794 bool generate_full_check_inline = false; |
| 2795 if (alternative.node()->EmitQuickCheck(compiler, |
| 2796 &new_variant, |
| 2797 preload_has_checked_bounds, |
| 2798 &alt_gen->possible_success, |
| 2799 &alt_gen->quick_check_details, |
| 2800 i < choice_count - 1)) { |
| 2801 // Quick check was generated for this choice. |
| 2802 preload_is_current = true; |
| 2803 preload_has_checked_bounds = true; |
| 2804 // On the last choice in the ChoiceNode we generated the quick |
| 2805 // check to fall through on possible success. So now we need to |
| 2806 // generate the full check inline. |
| 2807 if (i == choice_count - 1) { |
| 2808 macro_assembler->Bind(&alt_gen->possible_success); |
| 2809 new_variant.set_quick_check_performed(&alt_gen->quick_check_details); |
| 2810 generate_full_check_inline = true; |
| 2811 } |
| 2812 } else { |
| 2813 // No quick check was generated. Put the full code here. |
| 2814 if (i < choice_count - 1) { |
| 2815 new_variant.set_backtrack(&alt_gen->after); |
| 2816 } |
| 2817 generate_full_check_inline = true; |
2181 } | 2818 } |
2182 if (!alternative.node()->Emit(compiler, &new_variant)) { | 2819 if (generate_full_check_inline) { |
2183 after.Unuse(); | 2820 if (preload_is_current) { |
2184 return false; | 2821 new_variant.set_characters_preloaded(preload_characters); |
| 2822 } |
| 2823 for (int j = 0; j < guard_count; j++) { |
| 2824 GenerateGuard(macro_assembler, guards->at(j), &new_variant); |
| 2825 } |
| 2826 if (!alternative.node()->Emit(compiler, &new_variant)) { |
| 2827 greedy_loop_label.Unuse(); |
| 2828 return false; |
| 2829 } |
| 2830 preload_is_current = false; |
2185 } | 2831 } |
2186 macro_assembler->Bind(&after); | 2832 macro_assembler->Bind(&alt_gen->after); |
2187 } | 2833 } |
2188 GuardedAlternative alternative = alternatives_->at(choice_count - 1); | |
2189 ZoneList<Guard*>* guards = alternative.guards(); | |
2190 int guard_count = (guards == NULL) ? 0 : guards->length(); | |
2191 for (int j = 0; j < guard_count; j++) { | |
2192 GenerateGuard(macro_assembler, guards->at(j), current_variant); | |
2193 } | |
2194 bool ok = alternative.node()->Emit(compiler, current_variant); | |
2195 if (!ok) return false; | |
2196 if (greedy_loop) { | 2834 if (greedy_loop) { |
2197 macro_assembler->Bind(&greedy_loop_label); | 2835 macro_assembler->Bind(&greedy_loop_label); |
2198 // If we have unwound to the bottom then backtrack. | 2836 // If we have unwound to the bottom then backtrack. |
2199 macro_assembler->CheckGreedyLoop(variant->backtrack()); | 2837 macro_assembler->CheckGreedyLoop(variant->backtrack()); |
2200 // Otherwise try the second priority at an earlier position. | 2838 // Otherwise try the second priority at an earlier position. |
2201 macro_assembler->AdvanceCurrentPosition(-text_length); | 2839 macro_assembler->AdvanceCurrentPosition(-text_length); |
2202 macro_assembler->GoTo(&second_choice); | 2840 macro_assembler->GoTo(&second_choice); |
2203 } | 2841 } |
| 2842 // At this point we need to generate slow checks for the alternatives where |
| 2843 // the quick check was inlined. We can recognize these because the associated |
| 2844 // label was bound. |
| 2845 for (int i = first_normal_choice; i < choice_count - 1; i++) { |
| 2846 AlternativeGeneration* alt_gen = alt_gens.at(i); |
| 2847 if (!EmitOutOfLineContinuation(compiler, |
| 2848 current_variant, |
| 2849 alternatives_->at(i), |
| 2850 alt_gen, |
| 2851 preload_characters, |
| 2852 alt_gens.at(i + 1)->expects_preload)) { |
| 2853 return false; |
| 2854 } |
| 2855 } |
2204 return true; | 2856 return true; |
2205 } | 2857 } |
2206 | 2858 |
2207 | 2859 |
| 2860 bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, |
| 2861 GenerationVariant* variant, |
| 2862 GuardedAlternative alternative, |
| 2863 AlternativeGeneration* alt_gen, |
| 2864 int preload_characters, |
| 2865 bool next_expects_preload) { |
| 2866 if (!alt_gen->possible_success.is_linked()) return true; |
| 2867 |
| 2868 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 2869 macro_assembler->Bind(&alt_gen->possible_success); |
| 2870 GenerationVariant out_of_line_variant(*variant); |
| 2871 out_of_line_variant.set_characters_preloaded(preload_characters); |
| 2872 out_of_line_variant.set_quick_check_performed(&alt_gen->quick_check_details); |
| 2873 ZoneList<Guard*>* guards = alternative.guards(); |
| 2874 int guard_count = (guards == NULL) ? 0 : guards->length(); |
| 2875 if (next_expects_preload) { |
| 2876 Label reload_current_char; |
| 2877 out_of_line_variant.set_backtrack(&reload_current_char); |
| 2878 for (int j = 0; j < guard_count; j++) { |
| 2879 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant); |
| 2880 } |
| 2881 bool ok = alternative.node()->Emit(compiler, &out_of_line_variant); |
| 2882 macro_assembler->Bind(&reload_current_char); |
| 2883 // Reload the current character, since the next quick check expects that. |
| 2884 // We don't need to check bounds here because we only get into this |
| 2885 // code through a quick check which already did the checked load. |
| 2886 macro_assembler->LoadCurrentCharacter(variant->cp_offset(), |
| 2887 NULL, |
| 2888 false, |
| 2889 preload_characters); |
| 2890 macro_assembler->GoTo(&(alt_gen->after)); |
| 2891 return ok; |
| 2892 } else { |
| 2893 out_of_line_variant.set_backtrack(&(alt_gen->after)); |
| 2894 for (int j = 0; j < guard_count; j++) { |
| 2895 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant); |
| 2896 } |
| 2897 return alternative.node()->Emit(compiler, &out_of_line_variant); |
| 2898 } |
| 2899 } |
| 2900 |
| 2901 |
2208 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 2902 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { |
2209 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 2903 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2210 LimitResult limit_result = LimitVersions(compiler, variant); | 2904 LimitResult limit_result = LimitVersions(compiler, variant); |
2211 if (limit_result == DONE) return true; | 2905 if (limit_result == DONE) return true; |
2212 if (limit_result == FAIL) return false; | 2906 if (limit_result == FAIL) return false; |
2213 ASSERT(limit_result == CONTINUE); | 2907 ASSERT(limit_result == CONTINUE); |
2214 | 2908 |
2215 RecursionCheck rc(compiler); | 2909 RecursionCheck rc(compiler); |
2216 | 2910 |
2217 switch (type_) { | 2911 switch (type_) { |
2218 case STORE_POSITION: { | 2912 case STORE_POSITION: { |
2219 GenerationVariant::DeferredCapture | 2913 GenerationVariant::DeferredCapture |
(...skipping 11 matching lines...) Expand all Loading... |
2231 } | 2925 } |
2232 case SET_REGISTER: { | 2926 case SET_REGISTER: { |
2233 GenerationVariant::DeferredSetRegister | 2927 GenerationVariant::DeferredSetRegister |
2234 new_set(data_.u_store_register.reg, data_.u_store_register.value); | 2928 new_set(data_.u_store_register.reg, data_.u_store_register.value); |
2235 GenerationVariant new_variant = *variant; | 2929 GenerationVariant new_variant = *variant; |
2236 new_variant.add_action(&new_set); | 2930 new_variant.add_action(&new_set); |
2237 return on_success()->Emit(compiler, &new_variant); | 2931 return on_success()->Emit(compiler, &new_variant); |
2238 } | 2932 } |
2239 case BEGIN_SUBMATCH: | 2933 case BEGIN_SUBMATCH: |
2240 if (!variant->is_trivial()) return variant->Flush(compiler, this); | 2934 if (!variant->is_trivial()) return variant->Flush(compiler, this); |
2241 macro->WriteCurrentPositionToRegister( | 2935 assembler->WriteCurrentPositionToRegister( |
2242 data_.u_submatch.current_position_register, 0); | 2936 data_.u_submatch.current_position_register, 0); |
2243 macro->WriteStackPointerToRegister( | 2937 assembler->WriteStackPointerToRegister( |
2244 data_.u_submatch.stack_pointer_register); | 2938 data_.u_submatch.stack_pointer_register); |
2245 return on_success()->Emit(compiler, variant); | 2939 return on_success()->Emit(compiler, variant); |
2246 case POSITIVE_SUBMATCH_SUCCESS: | 2940 case POSITIVE_SUBMATCH_SUCCESS: |
2247 if (!variant->is_trivial()) return variant->Flush(compiler, this); | 2941 if (!variant->is_trivial()) return variant->Flush(compiler, this); |
2248 // TODO(erikcorry): Implement support. | 2942 // TODO(erikcorry): Implement support. |
2249 if (info()->follows_word_interest || | 2943 if (info()->follows_word_interest || |
2250 info()->follows_newline_interest || | 2944 info()->follows_newline_interest || |
2251 info()->follows_start_interest) { | 2945 info()->follows_start_interest) { |
2252 return false; | 2946 return false; |
2253 } | 2947 } |
2254 if (info()->at_end) { | 2948 if (info()->at_end) { |
2255 Label at_end; | 2949 Label at_end; |
2256 // Load current character jumps to the label if we are beyond the string | 2950 // Load current character jumps to the label if we are beyond the string |
2257 // end. | 2951 // end. |
2258 macro->LoadCurrentCharacter(0, &at_end); | 2952 assembler->LoadCurrentCharacter(0, &at_end); |
2259 macro->GoTo(variant->backtrack()); | 2953 assembler->GoTo(variant->backtrack()); |
2260 macro->Bind(&at_end); | 2954 assembler->Bind(&at_end); |
2261 } | 2955 } |
2262 macro->ReadCurrentPositionFromRegister( | 2956 assembler->ReadCurrentPositionFromRegister( |
2263 data_.u_submatch.current_position_register); | 2957 data_.u_submatch.current_position_register); |
2264 macro->ReadStackPointerFromRegister( | 2958 assembler->ReadStackPointerFromRegister( |
2265 data_.u_submatch.stack_pointer_register); | 2959 data_.u_submatch.stack_pointer_register); |
2266 return on_success()->Emit(compiler, variant); | 2960 return on_success()->Emit(compiler, variant); |
2267 default: | 2961 default: |
2268 UNREACHABLE(); | 2962 UNREACHABLE(); |
2269 return false; | 2963 return false; |
2270 } | 2964 } |
2271 } | 2965 } |
2272 | 2966 |
2273 | 2967 |
2274 bool BackReferenceNode::Emit(RegExpCompiler* compiler, | 2968 bool BackReferenceNode::Emit(RegExpCompiler* compiler, |
2275 GenerationVariant* variant) { | 2969 GenerationVariant* variant) { |
2276 RegExpMacroAssembler* macro = compiler->macro_assembler(); | 2970 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2277 if (!variant->is_trivial()) { | 2971 if (!variant->is_trivial()) { |
2278 return variant->Flush(compiler, this); | 2972 return variant->Flush(compiler, this); |
2279 } | 2973 } |
2280 | 2974 |
2281 LimitResult limit_result = LimitVersions(compiler, variant); | 2975 LimitResult limit_result = LimitVersions(compiler, variant); |
2282 if (limit_result == DONE) return true; | 2976 if (limit_result == DONE) return true; |
2283 if (limit_result == FAIL) return false; | 2977 if (limit_result == FAIL) return false; |
2284 ASSERT(limit_result == CONTINUE); | 2978 ASSERT(limit_result == CONTINUE); |
2285 | 2979 |
2286 RecursionCheck rc(compiler); | 2980 RecursionCheck rc(compiler); |
2287 | 2981 |
2288 ASSERT_EQ(start_reg_ + 1, end_reg_); | 2982 ASSERT_EQ(start_reg_ + 1, end_reg_); |
2289 if (info()->at_end) { | 2983 if (info()->at_end) { |
2290 // If we are constrained to match at the end of the input then succeed | 2984 // If we are constrained to match at the end of the input then succeed |
2291 // iff the back reference is empty. | 2985 // iff the back reference is empty. |
2292 macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack()); | 2986 assembler->CheckNotRegistersEqual(start_reg_, |
| 2987 end_reg_, |
| 2988 variant->backtrack()); |
2293 } else { | 2989 } else { |
2294 if (compiler->ignore_case()) { | 2990 if (compiler->ignore_case()) { |
2295 macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack()); | 2991 assembler->CheckNotBackReferenceIgnoreCase(start_reg_, |
| 2992 variant->backtrack()); |
2296 } else { | 2993 } else { |
2297 macro->CheckNotBackReference(start_reg_, variant->backtrack()); | 2994 assembler->CheckNotBackReference(start_reg_, variant->backtrack()); |
2298 } | 2995 } |
2299 } | 2996 } |
2300 return on_success()->Emit(compiler, variant); | 2997 return on_success()->Emit(compiler, variant); |
2301 } | 2998 } |
2302 | 2999 |
2303 | 3000 |
2304 // ------------------------------------------------------------------- | 3001 // ------------------------------------------------------------------- |
2305 // Dot/dotty output | 3002 // Dot/dotty output |
2306 | 3003 |
2307 | 3004 |
(...skipping 1281 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3589 EmbeddedVector<byte, 1024> codes; | 4286 EmbeddedVector<byte, 1024> codes; |
3590 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4287 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
3591 return compiler.Assemble(¯o_assembler, | 4288 return compiler.Assemble(¯o_assembler, |
3592 node, | 4289 node, |
3593 data->capture_count, | 4290 data->capture_count, |
3594 pattern); | 4291 pattern); |
3595 } | 4292 } |
3596 | 4293 |
3597 | 4294 |
3598 }} // namespace v8::internal | 4295 }} // namespace v8::internal |
OLD | NEW |