| OLD | NEW | 
|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without | 
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are | 
| 4 // met: | 4 // met: | 
| 5 // | 5 // | 
| 6 //     * Redistributions of source code must retain the above copyright | 6 //     * Redistributions of source code must retain the above copyright | 
| 7 //       notice, this list of conditions and the following disclaimer. | 7 //       notice, this list of conditions and the following disclaimer. | 
| 8 //     * Redistributions in binary form must reproduce the above | 8 //     * Redistributions in binary form must reproduce the above | 
| 9 //       copyright notice, this list of conditions and the following | 9 //       copyright notice, this list of conditions and the following | 
| 10 //       disclaimer in the documentation and/or other materials provided | 10 //       disclaimer in the documentation and/or other materials provided | 
| (...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 207 class OffsetsVector { | 207 class OffsetsVector { | 
| 208  public: | 208  public: | 
| 209   inline OffsetsVector(int num_registers) | 209   inline OffsetsVector(int num_registers) | 
| 210       : offsets_vector_length_(num_registers) { | 210       : offsets_vector_length_(num_registers) { | 
| 211     if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 211     if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 
| 212       vector_ = NewArray<int>(offsets_vector_length_); | 212       vector_ = NewArray<int>(offsets_vector_length_); | 
| 213     } else { | 213     } else { | 
| 214       vector_ = static_offsets_vector_; | 214       vector_ = static_offsets_vector_; | 
| 215     } | 215     } | 
| 216   } | 216   } | 
| 217 |  | 
| 218 |  | 
| 219   inline ~OffsetsVector() { | 217   inline ~OffsetsVector() { | 
| 220     if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 218     if (offsets_vector_length_ > kStaticOffsetsVectorSize) { | 
| 221       DeleteArray(vector_); | 219       DeleteArray(vector_); | 
| 222       vector_ = NULL; | 220       vector_ = NULL; | 
| 223     } | 221     } | 
| 224   } | 222   } | 
| 225 | 223   inline int* vector() { return vector_; } | 
| 226 | 224   inline int length() { return offsets_vector_length_; } | 
| 227   inline int* vector() { |  | 
| 228     return vector_; |  | 
| 229   } |  | 
| 230 |  | 
| 231 |  | 
| 232   inline int length() { |  | 
| 233     return offsets_vector_length_; |  | 
| 234   } |  | 
| 235 | 225 | 
| 236  private: | 226  private: | 
| 237   int* vector_; | 227   int* vector_; | 
| 238   int offsets_vector_length_; | 228   int offsets_vector_length_; | 
| 239   static const int kStaticOffsetsVectorSize = 50; | 229   static const int kStaticOffsetsVectorSize = 50; | 
| 240   static int static_offsets_vector_[kStaticOffsetsVectorSize]; | 230   static int static_offsets_vector_[kStaticOffsetsVectorSize]; | 
| 241 }; | 231 }; | 
| 242 | 232 | 
| 243 | 233 | 
| 244 int OffsetsVector::static_offsets_vector_[ | 234 int OffsetsVector::static_offsets_vector_[ | 
| (...skipping 551 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 796   int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | 786   int previous_index = static_cast<int>(DoubleToInteger(index->Number())); | 
| 797 | 787 | 
| 798 #ifdef DEBUG | 788 #ifdef DEBUG | 
| 799   if (FLAG_trace_regexp_bytecodes) { | 789   if (FLAG_trace_regexp_bytecodes) { | 
| 800     String* pattern = regexp->Pattern(); | 790     String* pattern = regexp->Pattern(); | 
| 801     PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString())); | 791     PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString())); | 
| 802     PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 792     PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 
| 803   } | 793   } | 
| 804 #endif | 794 #endif | 
| 805   LOG(RegExpExecEvent(regexp, previous_index, subject)); | 795   LOG(RegExpExecEvent(regexp, previous_index, subject)); | 
|  | 796 | 
|  | 797   if (!subject->IsFlat(StringShape(*subject))) { | 
|  | 798     FlattenString(subject); | 
|  | 799   } | 
|  | 800 | 
| 806   return IrregexpExecOnce(irregexp, | 801   return IrregexpExecOnce(irregexp, | 
| 807                           num_captures, | 802                           num_captures, | 
| 808                           subject, | 803                           subject, | 
| 809                           previous_index, | 804                           previous_index, | 
| 810                           offsets.vector(), | 805                           offsets.vector(), | 
| 811                           offsets.length()); | 806                           offsets.length()); | 
| 812 } | 807 } | 
| 813 | 808 | 
| 814 | 809 | 
| 815 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | 810 Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp, | 
| (...skipping 14 matching lines...) Expand all  Loading... | 
| 830   int previous_index = 0; | 825   int previous_index = 0; | 
| 831 | 826 | 
| 832   Handle<JSArray> result = Factory::NewJSArray(0); | 827   Handle<JSArray> result = Factory::NewJSArray(0); | 
| 833   int i = 0; | 828   int i = 0; | 
| 834   Handle<Object> matches; | 829   Handle<Object> matches; | 
| 835 | 830 | 
| 836   if (!subject->IsFlat(shape)) { | 831   if (!subject->IsFlat(shape)) { | 
| 837     subject->Flatten(shape); | 832     subject->Flatten(shape); | 
| 838   } | 833   } | 
| 839 | 834 | 
| 840   do { | 835   while (true) { | 
| 841     if (previous_index > subject->length() || previous_index < 0) { | 836     if (previous_index > subject->length() || previous_index < 0) { | 
| 842       // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 837       // Per ECMA-262 15.10.6.2, if the previous index is greater than the | 
| 843       // string length, there is no match. | 838       // string length, there is no match. | 
| 844       matches = Factory::null_value(); | 839       matches = Factory::null_value(); | 
|  | 840       return result; | 
| 845     } else { | 841     } else { | 
| 846 #ifdef DEBUG | 842 #ifdef DEBUG | 
| 847       if (FLAG_trace_regexp_bytecodes) { | 843       if (FLAG_trace_regexp_bytecodes) { | 
| 848         String* pattern = regexp->Pattern(); | 844         String* pattern = regexp->Pattern(); | 
| 849         PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString())); | 845         PrintF("\n\nRegexp match:   /%s/\n\n", *(pattern->ToCString())); | 
| 850         PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 846         PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); | 
| 851       } | 847       } | 
| 852 #endif | 848 #endif | 
| 853       LOG(RegExpExecEvent(regexp, previous_index, subject)); | 849       LOG(RegExpExecEvent(regexp, previous_index, subject)); | 
| 854       matches = IrregexpExecOnce(irregexp, | 850       matches = IrregexpExecOnce(irregexp, | 
| 855                                  IrregexpNumberOfCaptures(irregexp), | 851                                  IrregexpNumberOfCaptures(irregexp), | 
| 856                                  subject, | 852                                  subject, | 
| 857                                  previous_index, | 853                                  previous_index, | 
| 858                                  offsets.vector(), | 854                                  offsets.vector(), | 
| 859                                  offsets.length()); | 855                                  offsets.length()); | 
| 860 | 856 | 
| 861       if (matches->IsJSArray()) { | 857       if (matches->IsJSArray()) { | 
| 862         SetElement(result, i, matches); | 858         SetElement(result, i, matches); | 
| 863         i++; | 859         i++; | 
| 864         previous_index = offsets.vector()[1]; | 860         previous_index = offsets.vector()[1]; | 
| 865         if (offsets.vector()[0] == offsets.vector()[1]) { | 861         if (offsets.vector()[0] == offsets.vector()[1]) { | 
| 866           previous_index++; | 862           previous_index++; | 
| 867         } | 863         } | 
|  | 864       } else if (matches->IsNull()) { | 
|  | 865         return result; | 
|  | 866       } else { | 
|  | 867         return matches; | 
| 868       } | 868       } | 
| 869     } | 869     } | 
| 870   } while (matches->IsJSArray()); |  | 
| 871 |  | 
| 872   // If we exited the loop with an exception, throw it. |  | 
| 873   if (matches->IsNull()) { |  | 
| 874     // Exited loop normally. |  | 
| 875     return result; |  | 
| 876   } else { |  | 
| 877     // Exited loop with the exception in matches. |  | 
| 878     return matches; |  | 
| 879   } | 870   } | 
| 880 } | 871 } | 
| 881 | 872 | 
| 882 | 873 | 
| 883 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, | 874 Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp, | 
| 884                                             int num_captures, | 875                                             int num_captures, | 
| 885                                             Handle<String> subject, | 876                                             Handle<String> subject, | 
| 886                                             int previous_index, | 877                                             int previous_index, | 
| 887                                             int* offsets_vector, | 878                                             int* offsets_vector, | 
| 888                                             int offsets_vector_length) { | 879                                             int offsets_vector_length) { | 
|  | 880   ASSERT(subject->IsFlat(StringShape(*subject))); | 
| 889   bool rc; | 881   bool rc; | 
| 890 | 882 | 
| 891   int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); | 883   int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); | 
| 892 | 884 | 
| 893   if (!subject->IsFlat(StringShape(*subject))) { |  | 
| 894     FlattenString(subject); |  | 
| 895   } |  | 
| 896 |  | 
| 897   switch (tag) { | 885   switch (tag) { | 
| 898     case RegExpMacroAssembler::kIA32Implementation: { | 886     case RegExpMacroAssembler::kIA32Implementation: { | 
| 899 #ifndef ARM | 887 #ifndef ARM | 
| 900       Handle<Code> code = IrregexpNativeCode(irregexp); | 888       Handle<Code> code = IrregexpNativeCode(irregexp); | 
| 901 | 889 | 
| 902       StringShape shape(*subject); | 890       StringShape shape(*subject); | 
| 903 | 891 | 
| 904       // Character offsets into string. | 892       // Character offsets into string. | 
| 905       int start_offset = previous_index; | 893       int start_offset = previous_index; | 
| 906       int end_offset = subject->length(shape); | 894       int end_offset = subject->length(shape); | 
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 990       rc = false; | 978       rc = false; | 
| 991       break; | 979       break; | 
| 992   } | 980   } | 
| 993 | 981 | 
| 994   if (!rc) { | 982   if (!rc) { | 
| 995     return Factory::null_value(); | 983     return Factory::null_value(); | 
| 996   } | 984   } | 
| 997 | 985 | 
| 998   Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | 986   Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1)); | 
| 999   // The captures come in (start, end+1) pairs. | 987   // The captures come in (start, end+1) pairs. | 
| 1000   for (int i = 0; i < 2 * (num_captures+1); i += 2) { | 988   for (int i = 0; i < 2 * (num_captures + 1); i += 2) { | 
| 1001     array->set(i, Smi::FromInt(offsets_vector[i])); | 989     array->set(i, Smi::FromInt(offsets_vector[i])); | 
| 1002     array->set(i+1, Smi::FromInt(offsets_vector[i+1])); | 990     array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); | 
| 1003   } | 991   } | 
| 1004   return Factory::NewJSArrayWithElements(array); | 992   return Factory::NewJSArrayWithElements(array); | 
| 1005 } | 993 } | 
| 1006 | 994 | 
| 1007 | 995 | 
| 1008 // ------------------------------------------------------------------- | 996 // ------------------------------------------------------------------- | 
| 1009 // Implmentation of the Irregexp regular expression engine. | 997 // Implmentation of the Irregexp regular expression engine. | 
| 1010 // | 998 // | 
| 1011 // The Irregexp regular expression engine is intended to be a complete | 999 // The Irregexp regular expression engine is intended to be a complete | 
| 1012 // implementation of ECMAScript regular expressions.  It generates either | 1000 // implementation of ECMAScript regular expressions.  It generates either | 
| (...skipping 324 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1337   for (DeferredAction* action = actions_; | 1325   for (DeferredAction* action = actions_; | 
| 1338        action != NULL; | 1326        action != NULL; | 
| 1339        action = action->next()) { | 1327        action = action->next()) { | 
| 1340     affected_registers->Set(action->reg()); | 1328     affected_registers->Set(action->reg()); | 
| 1341     if (action->reg() > max_register) max_register = action->reg(); | 1329     if (action->reg() > max_register) max_register = action->reg(); | 
| 1342   } | 1330   } | 
| 1343   return max_register; | 1331   return max_register; | 
| 1344 } | 1332 } | 
| 1345 | 1333 | 
| 1346 | 1334 | 
| 1347 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro, | 1335 void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* assembler, | 
| 1348                                               int max_register, | 1336                                               int max_register, | 
| 1349                                               OutSet& affected_registers) { | 1337                                               OutSet& affected_registers) { | 
| 1350   for (int reg = 0; reg <= max_register; reg++) { | 1338   for (int reg = 0; reg <= max_register; reg++) { | 
| 1351     if (affected_registers.Get(reg)) macro->PushRegister(reg); | 1339     if (affected_registers.Get(reg)) assembler->PushRegister(reg); | 
| 1352   } | 1340   } | 
| 1353 } | 1341 } | 
| 1354 | 1342 | 
| 1355 | 1343 | 
| 1356 void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro, | 1344 void GenerationVariant::RestoreAffectedRegisters( | 
| 1357                                                  int max_register, | 1345     RegExpMacroAssembler* assembler, | 
| 1358                                                  OutSet& affected_registers) { | 1346     int max_register, | 
|  | 1347     OutSet& affected_registers) { | 
| 1359   for (int reg = max_register; reg >= 0; reg--) { | 1348   for (int reg = max_register; reg >= 0; reg--) { | 
| 1360     if (affected_registers.Get(reg)) macro->PopRegister(reg); | 1349     if (affected_registers.Get(reg)) assembler->PopRegister(reg); | 
| 1361   } | 1350   } | 
| 1362 } | 1351 } | 
| 1363 | 1352 | 
| 1364 | 1353 | 
| 1365 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro, | 1354 void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* assembler, | 
| 1366                                                int max_register, | 1355                                                int max_register, | 
| 1367                                                OutSet& affected_registers) { | 1356                                                OutSet& affected_registers) { | 
| 1368   for (int reg = 0; reg <= max_register; reg++) { | 1357   for (int reg = 0; reg <= max_register; reg++) { | 
| 1369     if (!affected_registers.Get(reg)) { | 1358     if (!affected_registers.Get(reg)) { | 
| 1370       continue; | 1359       continue; | 
| 1371     } | 1360     } | 
| 1372     int value = 0; | 1361     int value = 0; | 
| 1373     bool absolute = false; | 1362     bool absolute = false; | 
| 1374     int store_position = -1; | 1363     int store_position = -1; | 
| 1375     // This is a little tricky because we are scanning the actions in reverse | 1364     // This is a little tricky because we are scanning the actions in reverse | 
| (...skipping 27 matching lines...) Expand all  Loading... | 
| 1403             ASSERT_EQ(value, 0); | 1392             ASSERT_EQ(value, 0); | 
| 1404             break; | 1393             break; | 
| 1405           } | 1394           } | 
| 1406           default: | 1395           default: | 
| 1407             UNREACHABLE(); | 1396             UNREACHABLE(); | 
| 1408             break; | 1397             break; | 
| 1409         } | 1398         } | 
| 1410       } | 1399       } | 
| 1411     } | 1400     } | 
| 1412     if (store_position != -1) { | 1401     if (store_position != -1) { | 
| 1413       macro->WriteCurrentPositionToRegister(reg, store_position); | 1402       assembler->WriteCurrentPositionToRegister(reg, store_position); | 
| 1414     } else { | 1403     } else { | 
| 1415       if (absolute) { | 1404       if (absolute) { | 
| 1416         macro->SetRegister(reg, value); | 1405         assembler->SetRegister(reg, value); | 
| 1417       } else { | 1406       } else { | 
| 1418         if (value != 0) { | 1407         if (value != 0) { | 
| 1419           macro->AdvanceRegister(reg, value); | 1408           assembler->AdvanceRegister(reg, value); | 
| 1420         } | 1409         } | 
| 1421       } | 1410       } | 
| 1422     } | 1411     } | 
| 1423   } | 1412   } | 
| 1424 } | 1413 } | 
| 1425 | 1414 | 
| 1426 | 1415 | 
| 1427 // This is called as we come into a loop choice node and some other tricky | 1416 // This is called as we come into a loop choice node and some other tricky | 
| 1428 // nodes.  It normalises the state of the code generator to ensure we can | 1417 // nodes.  It normalises the state of the code generator to ensure we can | 
| 1429 // generate generic code. | 1418 // generate generic code. | 
| 1430 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 1419 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) { | 
| 1431   RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1420   RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
| 1432 | 1421 | 
| 1433   ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL); | 1422   ASSERT(actions_ != NULL || | 
|  | 1423          cp_offset_ != 0 || | 
|  | 1424          backtrack() != NULL || | 
|  | 1425          characters_preloaded_ != 0 || | 
|  | 1426          quick_check_performed_.characters() != 0); | 
| 1434 | 1427 | 
| 1435   if (actions_ == NULL && backtrack() == NULL) { | 1428   if (actions_ == NULL && backtrack() == NULL) { | 
| 1436     // Here we just have some deferred cp advances to fix and we are back to | 1429     // Here we just have some deferred cp advances to fix and we are back to | 
| 1437     // a normal situation. | 1430     // a normal situation.  We may also have to forget some information gained | 
| 1438     macro->AdvanceCurrentPosition(cp_offset_); | 1431     // through a quick check that was already performed. | 
|  | 1432     if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); | 
| 1439     // Create a new trivial state and generate the node with that. | 1433     // Create a new trivial state and generate the node with that. | 
| 1440     GenerationVariant new_state; | 1434     GenerationVariant new_state; | 
| 1441     return successor->Emit(compiler, &new_state); | 1435     return successor->Emit(compiler, &new_state); | 
| 1442   } | 1436   } | 
| 1443 | 1437 | 
| 1444   // Generate deferred actions here along with code to undo them again. | 1438   // Generate deferred actions here along with code to undo them again. | 
| 1445   OutSet affected_registers; | 1439   OutSet affected_registers; | 
| 1446   int max_register = FindAffectedRegisters(&affected_registers); | 1440   int max_register = FindAffectedRegisters(&affected_registers); | 
| 1447   PushAffectedRegisters(macro, max_register, affected_registers); | 1441   PushAffectedRegisters(assembler, max_register, affected_registers); | 
| 1448   PerformDeferredActions(macro, max_register, affected_registers); | 1442   PerformDeferredActions(assembler, max_register, affected_registers); | 
| 1449   if (backtrack() != NULL) { | 1443   if (backtrack() != NULL) { | 
| 1450     // Here we have a concrete backtrack location.  These are set up by choice | 1444     // Here we have a concrete backtrack location.  These are set up by choice | 
| 1451     // nodes and so they indicate that we have a deferred save of the current | 1445     // nodes and so they indicate that we have a deferred save of the current | 
| 1452     // position which we may need to emit here. | 1446     // position which we may need to emit here. | 
| 1453     macro->PushCurrentPosition(); | 1447     assembler->PushCurrentPosition(); | 
| 1454   } | 1448   } | 
| 1455   if (cp_offset_ != 0) { | 1449   if (cp_offset_ != 0) { | 
| 1456     macro->AdvanceCurrentPosition(cp_offset_); | 1450     assembler->AdvanceCurrentPosition(cp_offset_); | 
| 1457   } | 1451   } | 
| 1458 | 1452 | 
| 1459   // Create a new trivial state and generate the node with that. | 1453   // Create a new trivial state and generate the node with that. | 
| 1460   Label undo; | 1454   Label undo; | 
| 1461   macro->PushBacktrack(&undo); | 1455   assembler->PushBacktrack(&undo); | 
| 1462   GenerationVariant new_state; | 1456   GenerationVariant new_state; | 
| 1463   bool ok = successor->Emit(compiler, &new_state); | 1457   bool ok = successor->Emit(compiler, &new_state); | 
| 1464 | 1458 | 
| 1465   // On backtrack we need to restore state. | 1459   // On backtrack we need to restore state. | 
| 1466   macro->Bind(&undo); | 1460   assembler->Bind(&undo); | 
| 1467   if (!ok) return false; | 1461   if (!ok) return false; | 
| 1468   if (backtrack() != NULL) { | 1462   if (backtrack() != NULL) { | 
| 1469     macro->PopCurrentPosition(); | 1463     assembler->PopCurrentPosition(); | 
| 1470   } | 1464   } | 
| 1471   RestoreAffectedRegisters(macro, max_register, affected_registers); | 1465   RestoreAffectedRegisters(assembler, max_register, affected_registers); | 
| 1472   if (backtrack() == NULL) { | 1466   if (backtrack() == NULL) { | 
| 1473     macro->Backtrack(); | 1467     assembler->Backtrack(); | 
| 1474   } else { | 1468   } else { | 
| 1475     macro->GoTo(backtrack()); | 1469     assembler->GoTo(backtrack()); | 
| 1476   } | 1470   } | 
| 1477 | 1471 | 
| 1478   return true; | 1472   return true; | 
| 1479 } | 1473 } | 
| 1480 | 1474 | 
| 1481 | 1475 | 
| 1482 void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro, | 1476 void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, | 
| 1483                              GenerationVariant* variant) { | 1477                              GenerationVariant* variant) { | 
| 1484   if (info()->at_end) { | 1478   if (info()->at_end) { | 
| 1485     Label succeed; | 1479     Label succeed; | 
| 1486     // LoadCurrentCharacter will go to the label if we are at the end of the | 1480     // LoadCurrentCharacter will go to the label if we are at the end of the | 
| 1487     // input string. | 1481     // input string. | 
| 1488     macro->LoadCurrentCharacter(0, &succeed); | 1482     assembler->LoadCurrentCharacter(0, &succeed); | 
| 1489     macro->GoTo(variant->backtrack()); | 1483     assembler->GoTo(variant->backtrack()); | 
| 1490     macro->Bind(&succeed); | 1484     assembler->Bind(&succeed); | 
| 1491   } | 1485   } | 
| 1492 } | 1486 } | 
| 1493 | 1487 | 
| 1494 | 1488 | 
| 1495 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, | 1489 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, | 
| 1496                                    GenerationVariant* variant) { | 1490                                    GenerationVariant* variant) { | 
| 1497   if (!variant->is_trivial()) { | 1491   if (!variant->is_trivial()) { | 
| 1498     return variant->Flush(compiler, this); | 1492     return variant->Flush(compiler, this); | 
| 1499   } | 1493   } | 
| 1500   RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1494   RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
| 1501   if (!label()->is_bound()) { | 1495   if (!label()->is_bound()) { | 
| 1502     macro->Bind(label()); | 1496     assembler->Bind(label()); | 
| 1503   } | 1497   } | 
| 1504   EmitInfoChecks(macro, variant); | 1498   EmitInfoChecks(assembler, variant); | 
| 1505   macro->ReadCurrentPositionFromRegister(current_position_register_); | 1499   assembler->ReadCurrentPositionFromRegister(current_position_register_); | 
| 1506   macro->ReadStackPointerFromRegister(stack_pointer_register_); | 1500   assembler->ReadStackPointerFromRegister(stack_pointer_register_); | 
| 1507   // Now that we have unwound the stack we find at the top of the stack the | 1501   // Now that we have unwound the stack we find at the top of the stack the | 
| 1508   // backtrack that the BeginSubmatch node got. | 1502   // backtrack that the BeginSubmatch node got. | 
| 1509   macro->Backtrack(); | 1503   assembler->Backtrack(); | 
| 1510   return true; | 1504   return true; | 
| 1511 } | 1505 } | 
| 1512 | 1506 | 
| 1513 | 1507 | 
| 1514 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 1508 bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 
| 1515   if (!variant->is_trivial()) { | 1509   if (!variant->is_trivial()) { | 
| 1516     return variant->Flush(compiler, this); | 1510     return variant->Flush(compiler, this); | 
| 1517   } | 1511   } | 
| 1518   RegExpMacroAssembler* macro = compiler->macro_assembler(); | 1512   RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
| 1519   if (!label()->is_bound()) { | 1513   if (!label()->is_bound()) { | 
| 1520     macro->Bind(label()); | 1514     assembler->Bind(label()); | 
| 1521   } | 1515   } | 
| 1522   switch (action_) { | 1516   switch (action_) { | 
| 1523     case ACCEPT: | 1517     case ACCEPT: | 
| 1524       EmitInfoChecks(macro, variant); | 1518       EmitInfoChecks(assembler, variant); | 
| 1525       macro->Succeed(); | 1519       assembler->Succeed(); | 
| 1526       return true; | 1520       return true; | 
| 1527     case BACKTRACK: | 1521     case BACKTRACK: | 
| 1528       ASSERT(!info()->at_end); | 1522       ASSERT(!info()->at_end); | 
| 1529       macro->GoTo(variant->backtrack()); | 1523       assembler->GoTo(variant->backtrack()); | 
| 1530       return true; | 1524       return true; | 
| 1531     case NEGATIVE_SUBMATCH_SUCCESS: | 1525     case NEGATIVE_SUBMATCH_SUCCESS: | 
| 1532       // This case is handled in a different virtual method. | 1526       // This case is handled in a different virtual method. | 
| 1533       UNREACHABLE(); | 1527       UNREACHABLE(); | 
| 1534   } | 1528   } | 
| 1535   UNIMPLEMENTED(); | 1529   UNIMPLEMENTED(); | 
| 1536   return false; | 1530   return false; | 
| 1537 } | 1531 } | 
| 1538 | 1532 | 
| 1539 | 1533 | 
| (...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1622                                     variant->backtrack()); | 1616                                     variant->backtrack()); | 
| 1623       break; | 1617       break; | 
| 1624   } | 1618   } | 
| 1625 } | 1619 } | 
| 1626 | 1620 | 
| 1627 | 1621 | 
| 1628 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | 1622 static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize; | 
| 1629 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | 1623 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange; | 
| 1630 | 1624 | 
| 1631 | 1625 | 
| 1632 static inline void EmitAtomNonLetters( | 1626 // Only emits non-letters (things that don't have case).  Only used for case | 
|  | 1627 // independent matches. | 
|  | 1628 static inline bool EmitAtomNonLetter( | 
| 1633     RegExpMacroAssembler* macro_assembler, | 1629     RegExpMacroAssembler* macro_assembler, | 
| 1634     TextElement elm, | 1630     uc16 c, | 
| 1635     Vector<const uc16> quarks, |  | 
| 1636     Label* on_failure, | 1631     Label* on_failure, | 
| 1637     int cp_offset, | 1632     int cp_offset, | 
| 1638     bool check_offset) { | 1633     bool check, | 
|  | 1634     bool preloaded) { | 
| 1639   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1635   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 
| 1640   // It is vital that this loop is backwards due to the unchecked character | 1636   int length = uncanonicalize.get(c, '\0', chars); | 
| 1641   // load below. | 1637   bool checked = false; | 
| 1642   for (int i = quarks.length() - 1; i >= 0; i--) { | 1638   if (length <= 1) { | 
| 1643     uc16 c = quarks[i]; | 1639     if (!preloaded) { | 
| 1644     int length = uncanonicalize.get(c, '\0', chars); | 1640       macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 
| 1645     if (length <= 1) { | 1641       checked = check; | 
| 1646       if (check_offset && i == quarks.length() - 1) { |  | 
| 1647         macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); |  | 
| 1648       } else { |  | 
| 1649         // Here we don't need to check against the end of the input string |  | 
| 1650         // since this character lies before a character that matched. |  | 
| 1651         macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); |  | 
| 1652       } |  | 
| 1653       macro_assembler->CheckNotCharacter(c, on_failure); |  | 
| 1654     } | 1642     } | 
|  | 1643     macro_assembler->CheckNotCharacter(c, on_failure); | 
| 1655   } | 1644   } | 
|  | 1645   return checked; | 
| 1656 } | 1646 } | 
| 1657 | 1647 | 
| 1658 | 1648 | 
| 1659 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 1649 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, | 
| 1660                                       uc16 c1, | 1650                                       uc16 c1, | 
| 1661                                       uc16 c2, | 1651                                       uc16 c2, | 
| 1662                                       Label* on_failure) { | 1652                                       Label* on_failure) { | 
| 1663   uc16 exor = c1 ^ c2; | 1653   uc16 exor = c1 ^ c2; | 
| 1664   // Check whether exor has only one bit set. | 1654   // Check whether exor has only one bit set. | 
| 1665   if (((exor - 1) & exor) == 0) { | 1655   if (((exor - 1) & exor) == 0) { | 
| 1666     // If c1 and c2 differ only by one bit. | 1656     // If c1 and c2 differ only by one bit. | 
| 1667     // Ecma262UnCanonicalize always gives the highest number last. | 1657     // Ecma262UnCanonicalize always gives the highest number last. | 
| 1668     ASSERT(c2 > c1); | 1658     ASSERT(c2 > c1); | 
| 1669     macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure); | 1659     uc16 mask = String::kMaxUC16CharCode ^ exor; | 
|  | 1660     macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure); | 
| 1670     return true; | 1661     return true; | 
| 1671   } | 1662   } | 
| 1672   ASSERT(c2 > c1); | 1663   ASSERT(c2 > c1); | 
| 1673   uc16 diff = c2 - c1; | 1664   uc16 diff = c2 - c1; | 
| 1674   if (((diff - 1) & diff) == 0 && c1 >= diff) { | 1665   if (((diff - 1) & diff) == 0 && c1 >= diff) { | 
| 1675     // If the characters differ by 2^n but don't differ by one bit then | 1666     // If the characters differ by 2^n but don't differ by one bit then | 
| 1676     // subtract the difference from the found character, then do the or | 1667     // subtract the difference from the found character, then do the or | 
| 1677     // trick.  We avoid the theoretical case where negative numbers are | 1668     // trick.  We avoid the theoretical case where negative numbers are | 
| 1678     // involved in order to simplify code generation. | 1669     // involved in order to simplify code generation. | 
| 1679     macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff, | 1670     uc16 mask = String::kMaxUC16CharCode ^ diff; | 
| 1680                                                    diff, | 1671     macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 
| 1681                                                    on_failure); | 1672                                                     diff, | 
|  | 1673                                                     mask, | 
|  | 1674                                                     on_failure); | 
| 1682     return true; | 1675     return true; | 
| 1683   } | 1676   } | 
| 1684   return false; | 1677   return false; | 
| 1685 } | 1678 } | 
| 1686 | 1679 | 
| 1687 | 1680 | 
| 1688 static inline void EmitAtomLetters( | 1681 // Only emits letters (things that have case).  Only used for case independent | 
|  | 1682 // matches. | 
|  | 1683 static inline bool EmitAtomLetter( | 
| 1689     RegExpMacroAssembler* macro_assembler, | 1684     RegExpMacroAssembler* macro_assembler, | 
| 1690     TextElement elm, | 1685     uc16 c, | 
| 1691     Vector<const uc16> quarks, |  | 
| 1692     Label* on_failure, | 1686     Label* on_failure, | 
| 1693     int cp_offset, | 1687     int cp_offset, | 
| 1694     bool check_offset) { | 1688     bool check, | 
|  | 1689     bool preloaded) { | 
| 1695   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1690   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 
| 1696   // It is vital that this loop is backwards due to the unchecked character | 1691   int length = uncanonicalize.get(c, '\0', chars); | 
| 1697   // load below. | 1692   if (length <= 1) return false; | 
| 1698   for (int i = quarks.length() - 1; i >= 0; i--) { | 1693   // We may not need to check against the end of the input string | 
| 1699     uc16 c = quarks[i]; | 1694   // if this character lies before a character that matched. | 
| 1700     int length = uncanonicalize.get(c, '\0', chars); | 1695   if (!preloaded) { | 
| 1701     if (length <= 1) continue; | 1696     macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 
| 1702     if (check_offset && i == quarks.length() - 1) { | 1697   } | 
| 1703       macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); | 1698   Label ok; | 
| 1704     } else { | 1699   ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 
| 1705       // Here we don't need to check against the end of the input string | 1700   switch (length) { | 
| 1706       // since this character lies before a character that matched. | 1701     case 2: { | 
| 1707       macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i); | 1702       if (ShortCutEmitCharacterPair(macro_assembler, | 
|  | 1703                                     chars[0], | 
|  | 1704                                     chars[1], | 
|  | 1705                                     on_failure)) { | 
|  | 1706       } else { | 
|  | 1707         macro_assembler->CheckCharacter(chars[0], &ok); | 
|  | 1708         macro_assembler->CheckNotCharacter(chars[1], on_failure); | 
|  | 1709         macro_assembler->Bind(&ok); | 
|  | 1710       } | 
|  | 1711       break; | 
| 1708     } | 1712     } | 
| 1709     Label ok; | 1713     case 4: | 
| 1710     ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1714       macro_assembler->CheckCharacter(chars[3], &ok); | 
| 1711     switch (length) { | 1715       // Fall through! | 
| 1712       case 2: { | 1716     case 3: | 
| 1713         if (ShortCutEmitCharacterPair(macro_assembler, | 1717       macro_assembler->CheckCharacter(chars[0], &ok); | 
| 1714                                       chars[0], | 1718       macro_assembler->CheckCharacter(chars[1], &ok); | 
| 1715                                       chars[1], | 1719       macro_assembler->CheckNotCharacter(chars[2], on_failure); | 
| 1716                                       on_failure)) { | 1720       macro_assembler->Bind(&ok); | 
| 1717         } else { | 1721       break; | 
| 1718           macro_assembler->CheckCharacter(chars[0], &ok); | 1722     default: | 
| 1719           macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1723       UNREACHABLE(); | 
| 1720           macro_assembler->Bind(&ok); | 1724       break; | 
| 1721         } |  | 
| 1722         break; |  | 
| 1723       } |  | 
| 1724       case 4: |  | 
| 1725         macro_assembler->CheckCharacter(chars[3], &ok); |  | 
| 1726         // Fall through! |  | 
| 1727       case 3: |  | 
| 1728         macro_assembler->CheckCharacter(chars[0], &ok); |  | 
| 1729         macro_assembler->CheckCharacter(chars[1], &ok); |  | 
| 1730         macro_assembler->CheckNotCharacter(chars[2], on_failure); |  | 
| 1731         macro_assembler->Bind(&ok); |  | 
| 1732         break; |  | 
| 1733       default: |  | 
| 1734         UNREACHABLE(); |  | 
| 1735         break; |  | 
| 1736     } |  | 
| 1737   } | 1725   } | 
|  | 1726   return true; | 
| 1738 } | 1727 } | 
| 1739 | 1728 | 
| 1740 | 1729 | 
| 1741 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1730 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 
| 1742                           RegExpCharacterClass* cc, | 1731                           RegExpCharacterClass* cc, | 
| 1743                           int cp_offset, | 1732                           int cp_offset, | 
| 1744                           Label* on_failure, | 1733                           Label* on_failure, | 
| 1745                           bool check_offset, | 1734                           bool check_offset, | 
| 1746                           bool ascii) { | 1735                           bool ascii, | 
|  | 1736                           bool preloaded) { | 
| 1747   ZoneList<CharacterRange>* ranges = cc->ranges(); | 1737   ZoneList<CharacterRange>* ranges = cc->ranges(); | 
| 1748   int max_char; | 1738   int max_char; | 
| 1749   if (ascii) { | 1739   if (ascii) { | 
| 1750     max_char = String::kMaxAsciiCharCode; | 1740     max_char = String::kMaxAsciiCharCode; | 
| 1751   } else { | 1741   } else { | 
| 1752     max_char = String::kMaxUC16CharCode; | 1742     max_char = String::kMaxUC16CharCode; | 
| 1753   } | 1743   } | 
| 1754 | 1744 | 
| 1755   Label success; | 1745   Label success; | 
| 1756 | 1746 | 
| (...skipping 25 matching lines...) Expand all  Loading... | 
| 1782       ranges->at(0).IsEverything(max_char)) { | 1772       ranges->at(0).IsEverything(max_char)) { | 
| 1783     // This is a common case hit by non-anchored expressions. | 1773     // This is a common case hit by non-anchored expressions. | 
| 1784     // TODO(erikcorry): We should have a macro assembler instruction that just | 1774     // TODO(erikcorry): We should have a macro assembler instruction that just | 
| 1785     // checks for end of string without loading the character. | 1775     // checks for end of string without loading the character. | 
| 1786     if (check_offset) { | 1776     if (check_offset) { | 
| 1787       macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1777       macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 
| 1788     } | 1778     } | 
| 1789     return; | 1779     return; | 
| 1790   } | 1780   } | 
| 1791 | 1781 | 
| 1792   if (check_offset) { | 1782   if (!preloaded) { | 
| 1793     macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); | 1783     macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 
| 1794   } else { |  | 
| 1795     // Here we don't need to check against the end of the input string |  | 
| 1796     // since this character lies before a character that matched. |  | 
| 1797     macro_assembler->LoadCurrentCharacterUnchecked(cp_offset); |  | 
| 1798   } | 1784   } | 
| 1799 | 1785 | 
| 1800   for (int i = 0; i <= last_valid_range; i++) { | 1786   for (int i = 0; i < last_valid_range; i++) { | 
| 1801     CharacterRange& range = ranges->at(i); | 1787     CharacterRange& range = ranges->at(i); | 
| 1802     Label next_range; | 1788     Label next_range; | 
| 1803     uc16 from = range.from(); | 1789     uc16 from = range.from(); | 
| 1804     uc16 to = range.to(); | 1790     uc16 to = range.to(); | 
| 1805     if (from > max_char) { | 1791     if (from > max_char) { | 
| 1806       continue; | 1792       continue; | 
| 1807     } | 1793     } | 
| 1808     if (to > max_char) to = max_char; | 1794     if (to > max_char) to = max_char; | 
| 1809     if (to == from) { | 1795     if (to == from) { | 
| 1810       macro_assembler->CheckCharacter(to, char_is_in_class); | 1796       macro_assembler->CheckCharacter(to, char_is_in_class); | 
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 1851     } else { | 1837     } else { | 
| 1852       if (cc->is_negated()) { | 1838       if (cc->is_negated()) { | 
| 1853         macro_assembler->GoTo(on_failure); | 1839         macro_assembler->GoTo(on_failure); | 
| 1854       } | 1840       } | 
| 1855     } | 1841     } | 
| 1856   } | 1842   } | 
| 1857   macro_assembler->Bind(&success); | 1843   macro_assembler->Bind(&success); | 
| 1858 } | 1844 } | 
| 1859 | 1845 | 
| 1860 | 1846 | 
|  | 1847 RegExpNode::~RegExpNode() { | 
|  | 1848 } | 
|  | 1849 | 
|  | 1850 | 
| 1861 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 1851 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, | 
| 1862                                                   GenerationVariant* variant) { | 1852                                                   GenerationVariant* variant) { | 
| 1863   // TODO(erikcorry): Implement support. | 1853   // TODO(erikcorry): Implement support. | 
| 1864   if (info_.follows_word_interest || | 1854   if (info_.follows_word_interest || | 
| 1865       info_.follows_newline_interest || | 1855       info_.follows_newline_interest || | 
| 1866       info_.follows_start_interest) { | 1856       info_.follows_start_interest) { | 
| 1867     return FAIL; | 1857     return FAIL; | 
| 1868   } | 1858   } | 
| 1869 | 1859 | 
| 1870   // If we are generating a greedy loop then don't stop and don't reuse code. | 1860   // If we are generating a greedy loop then don't stop and don't reuse code. | 
| (...skipping 30 matching lines...) Expand all  Loading... | 
| 1901   } | 1891   } | 
| 1902 | 1892 | 
| 1903   // If we get here there have been too many variants generated or recursion | 1893   // If we get here there have been too many variants generated or recursion | 
| 1904   // is too deep.  Time to switch to a generic version.  The code for | 1894   // is too deep.  Time to switch to a generic version.  The code for | 
| 1905   // generic versions above can handle deep recursion properly. | 1895   // generic versions above can handle deep recursion properly. | 
| 1906   bool ok = variant->Flush(compiler, this); | 1896   bool ok = variant->Flush(compiler, this); | 
| 1907   return ok ? DONE : FAIL; | 1897   return ok ? DONE : FAIL; | 
| 1908 } | 1898 } | 
| 1909 | 1899 | 
| 1910 | 1900 | 
|  | 1901 int ActionNode::EatsAtLeast(int recursion_depth) { | 
|  | 1902   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 
|  | 1903   if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0;  // Rewinds input! | 
|  | 1904   return on_success()->EatsAtLeast(recursion_depth + 1); | 
|  | 1905 } | 
|  | 1906 | 
|  | 1907 | 
|  | 1908 int TextNode::EatsAtLeast(int recursion_depth) { | 
|  | 1909   int answer = Length(); | 
|  | 1910   if (answer >= 4) return answer; | 
|  | 1911   if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer; | 
|  | 1912   return answer + on_success()->EatsAtLeast(recursion_depth + 1); | 
|  | 1913 } | 
|  | 1914 | 
|  | 1915 | 
|  | 1916 | 
|  | 1917 int ChoiceNode::EatsAtLeastHelper(int recursion_depth, int start_from_node) { | 
|  | 1918   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 
|  | 1919   int min = 100; | 
|  | 1920   int choice_count = alternatives_->length(); | 
|  | 1921   for (int i = start_from_node; i < choice_count; i++) { | 
|  | 1922     RegExpNode* node = alternatives_->at(i).node(); | 
|  | 1923     int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1); | 
|  | 1924     if (node_eats_at_least < min) min = node_eats_at_least; | 
|  | 1925   } | 
|  | 1926   return min; | 
|  | 1927 } | 
|  | 1928 | 
|  | 1929 | 
|  | 1930 int LoopChoiceNode::EatsAtLeast(int recursion_depth) { | 
|  | 1931   return 0; | 
|  | 1932 } | 
|  | 1933 | 
|  | 1934 | 
|  | 1935 int ChoiceNode::EatsAtLeast(int recursion_depth) { | 
|  | 1936   return EatsAtLeastHelper(recursion_depth, 0); | 
|  | 1937 } | 
|  | 1938 | 
|  | 1939 | 
|  | 1940 // Takes the left-most 1-bit and smears it out, setting all bits to its right. | 
|  | 1941 static inline uint32_t SmearBitsRight(uint32_t v) { | 
|  | 1942   v |= v >> 1; | 
|  | 1943   v |= v >> 2; | 
|  | 1944   v |= v >> 4; | 
|  | 1945   v |= v >> 8; | 
|  | 1946   v |= v >> 16; | 
|  | 1947   return v; | 
|  | 1948 } | 
|  | 1949 | 
|  | 1950 | 
|  | 1951 bool QuickCheckDetails::Rationalize(bool asc) { | 
|  | 1952   bool found_useful_op = false; | 
|  | 1953   uint32_t char_mask; | 
|  | 1954   if (asc) { | 
|  | 1955     char_mask = String::kMaxAsciiCharCode; | 
|  | 1956   } else { | 
|  | 1957     char_mask = String::kMaxUC16CharCode; | 
|  | 1958   } | 
|  | 1959   mask_ = 0; | 
|  | 1960   value_ = 0; | 
|  | 1961   int char_shift = 0; | 
|  | 1962   for (int i = 0; i < characters_; i++) { | 
|  | 1963     Position* pos = &positions_[i]; | 
|  | 1964     if ((pos->mask & String::kMaxAsciiCharCode) != 0) { | 
|  | 1965       found_useful_op = true; | 
|  | 1966     } | 
|  | 1967     mask_ |= (pos->mask & char_mask) << char_shift; | 
|  | 1968     value_ |= (pos->value & char_mask) << char_shift; | 
|  | 1969     char_shift += asc ? 8 : 16; | 
|  | 1970   } | 
|  | 1971   return found_useful_op; | 
|  | 1972 } | 
|  | 1973 | 
|  | 1974 | 
|  | 1975 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler, | 
|  | 1976                                 GenerationVariant* variant, | 
|  | 1977                                 bool preload_has_checked_bounds, | 
|  | 1978                                 Label* on_possible_success, | 
|  | 1979                                 QuickCheckDetails* details, | 
|  | 1980                                 bool fall_through_on_failure) { | 
|  | 1981   if (details->characters() == 0) return false; | 
|  | 1982   GetQuickCheckDetails(details, compiler, 0); | 
|  | 1983   if (!details->Rationalize(compiler->ascii())) return false; | 
|  | 1984   uint32_t mask = details->mask(); | 
|  | 1985   uint32_t value = details->value(); | 
|  | 1986 | 
|  | 1987   RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
|  | 1988 | 
|  | 1989   if (variant->characters_preloaded() != details->characters()) { | 
|  | 1990     assembler->LoadCurrentCharacter(variant->cp_offset(), | 
|  | 1991                                     variant->backtrack(), | 
|  | 1992                                     !preload_has_checked_bounds, | 
|  | 1993                                     details->characters()); | 
|  | 1994   } | 
|  | 1995 | 
|  | 1996 | 
|  | 1997   bool need_mask = true; | 
|  | 1998 | 
|  | 1999   if (details->characters() == 1) { | 
|  | 2000     // If number of characters preloaded is 1 then we used a byte or 16 bit | 
|  | 2001     // load so the value is already masked down. | 
|  | 2002     uint32_t char_mask; | 
|  | 2003     if (compiler->ascii()) { | 
|  | 2004       char_mask = String::kMaxAsciiCharCode; | 
|  | 2005     } else { | 
|  | 2006       char_mask = String::kMaxUC16CharCode; | 
|  | 2007     } | 
|  | 2008     if ((mask & char_mask) == char_mask) need_mask = false; | 
|  | 2009   } else { | 
|  | 2010     // For 2-character preloads in ASCII mode we also use a 16 bit load with | 
|  | 2011     // zero extend. | 
|  | 2012     if (details->characters() == 2 && compiler->ascii()) { | 
|  | 2013       if ((mask & 0xffff) == 0xffff) need_mask = false; | 
|  | 2014     } else { | 
|  | 2015       if (mask == 0xffffffff) need_mask = false; | 
|  | 2016     } | 
|  | 2017   } | 
|  | 2018 | 
|  | 2019   if (fall_through_on_failure) { | 
|  | 2020     if (need_mask) { | 
|  | 2021       assembler->CheckCharacterAfterAnd(value, mask, on_possible_success); | 
|  | 2022     } else { | 
|  | 2023       assembler->CheckCharacter(value, on_possible_success); | 
|  | 2024     } | 
|  | 2025   } else { | 
|  | 2026     if (need_mask) { | 
|  | 2027       assembler->CheckNotCharacterAfterAnd(value, mask, variant->backtrack()); | 
|  | 2028     } else { | 
|  | 2029       assembler->CheckNotCharacter(value, variant->backtrack()); | 
|  | 2030     } | 
|  | 2031   } | 
|  | 2032   return true; | 
|  | 2033 } | 
|  | 2034 | 
|  | 2035 | 
|  | 2036 // Here is the meat of GetQuickCheckDetails (see also the comment on the | 
|  | 2037 // super-class in the .h file). | 
|  | 2038 // | 
|  | 2039 // We iterate along the text object, building up for each character a | 
|  | 2040 // mask and value that can be used to test for a quick failure to match. | 
|  | 2041 // The masks and values for the positions will be combined into a single | 
|  | 2042 // machine word for the current character width in order to be used in | 
|  | 2043 // generating a quick check. | 
|  | 2044 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 
|  | 2045                                     RegExpCompiler* compiler, | 
|  | 2046                                     int characters_filled_in) { | 
|  | 2047   ASSERT(characters_filled_in < details->characters()); | 
|  | 2048   int characters = details->characters(); | 
|  | 2049   int char_mask; | 
|  | 2050   int char_shift; | 
|  | 2051   if (compiler->ascii()) { | 
|  | 2052     char_mask = String::kMaxAsciiCharCode; | 
|  | 2053     char_shift = 8; | 
|  | 2054   } else { | 
|  | 2055     char_mask = String::kMaxUC16CharCode; | 
|  | 2056     char_shift = 16; | 
|  | 2057   } | 
|  | 2058   for (int k = 0; k < elms_->length(); k++) { | 
|  | 2059     TextElement elm = elms_->at(k); | 
|  | 2060     if (elm.type == TextElement::ATOM) { | 
|  | 2061       Vector<const uc16> quarks = elm.data.u_atom->data(); | 
|  | 2062       for (int i = 0; i < characters && i < quarks.length(); i++) { | 
|  | 2063         QuickCheckDetails::Position* pos = | 
|  | 2064             details->positions(characters_filled_in); | 
|  | 2065         if (compiler->ignore_case()) { | 
|  | 2066           unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 
|  | 2067           uc16 c = quarks[i]; | 
|  | 2068           int length = uncanonicalize.get(c, '\0', chars); | 
|  | 2069           if (length < 2) { | 
|  | 2070             // This letter has no case equivalents, so it's nice and simple | 
|  | 2071             // and the mask-compare will determine definitely whether we have | 
|  | 2072             // a match at this character position. | 
|  | 2073             pos->mask = char_mask; | 
|  | 2074             pos->value = c; | 
|  | 2075             pos->determines_perfectly = true; | 
|  | 2076           } else { | 
|  | 2077             uint32_t common_bits = char_mask; | 
|  | 2078             uint32_t bits = chars[0]; | 
|  | 2079             for (int j = 1; j < length; j++) { | 
|  | 2080               uint32_t differing_bits = ((chars[j] & common_bits) ^ bits); | 
|  | 2081               common_bits ^= differing_bits; | 
|  | 2082               bits &= common_bits; | 
|  | 2083             } | 
|  | 2084             // If length is 2 and common bits has only one zero in it then | 
|  | 2085             // our mask and compare instruction will determine definitely | 
|  | 2086             // whether we have a match at this character position.  Otherwise | 
|  | 2087             // it can only be an approximate check. | 
|  | 2088             uint32_t one_zero = (common_bits | ~char_mask); | 
|  | 2089             if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) { | 
|  | 2090               pos->determines_perfectly = true; | 
|  | 2091             } | 
|  | 2092             pos->mask = common_bits; | 
|  | 2093             pos->value = bits; | 
|  | 2094           } | 
|  | 2095         } else { | 
|  | 2096           // Don't ignore case.  Nice simple case where the mask-compare will | 
|  | 2097           // determine definitely whether we have a match at this character | 
|  | 2098           // position. | 
|  | 2099           pos->mask = char_mask; | 
|  | 2100           pos->value = quarks[i]; | 
|  | 2101           pos->determines_perfectly = true; | 
|  | 2102         } | 
|  | 2103         characters_filled_in++; | 
|  | 2104         ASSERT(characters_filled_in <= details->characters()); | 
|  | 2105         if (characters_filled_in == details->characters()) { | 
|  | 2106           return; | 
|  | 2107         } | 
|  | 2108       } | 
|  | 2109     } else { | 
|  | 2110       QuickCheckDetails::Position* pos = | 
|  | 2111           details->positions(characters_filled_in); | 
|  | 2112       RegExpCharacterClass* tree = elm.data.u_char_class; | 
|  | 2113       ZoneList<CharacterRange>* ranges = tree->ranges(); | 
|  | 2114       CharacterRange range = ranges->at(0); | 
|  | 2115       if (tree->is_negated()) { | 
|  | 2116         // A quick check uses multi-character mask and compare.  There is no | 
|  | 2117         // useful way to incorporate a negative char class into this scheme | 
|  | 2118         // so we just conservatively create a mask and value that will always | 
|  | 2119         // succeed. | 
|  | 2120         pos->mask = 0; | 
|  | 2121         pos->value = 0; | 
|  | 2122       } else { | 
|  | 2123         uint32_t differing_bits = (range.from() ^ range.to()); | 
|  | 2124         // A mask and compare is only perfect if the differing bits form a | 
|  | 2125         // number like 00011111 with one single block of trailing 1s. | 
|  | 2126         if ((differing_bits & (differing_bits + 1)) == 0) { | 
|  | 2127           pos->determines_perfectly = true; | 
|  | 2128         } | 
|  | 2129         uint32_t common_bits = ~SmearBitsRight(differing_bits); | 
|  | 2130         uint32_t bits = (range.from() & common_bits); | 
|  | 2131         for (int i = 1; i < ranges->length(); i++) { | 
|  | 2132           // Here we are combining more ranges into the mask and compare | 
|  | 2133           // value.  With each new range the mask becomes more sparse and | 
|  | 2134           // so the chances of a false positive rise.  A character class | 
|  | 2135           // with multiple ranges is assumed never to be equivalent to a | 
|  | 2136           // mask and compare operation. | 
|  | 2137           pos->determines_perfectly = false; | 
|  | 2138           CharacterRange range = ranges->at(i); | 
|  | 2139           uint32_t new_common_bits = (range.from() ^ range.to()); | 
|  | 2140           new_common_bits = ~SmearBitsRight(new_common_bits); | 
|  | 2141           common_bits &= new_common_bits; | 
|  | 2142           bits &= new_common_bits; | 
|  | 2143           uint32_t differing_bits = (range.from() & common_bits) ^ bits; | 
|  | 2144           common_bits ^= differing_bits; | 
|  | 2145           bits &= common_bits; | 
|  | 2146         } | 
|  | 2147         pos->mask = common_bits; | 
|  | 2148         pos->value = bits; | 
|  | 2149       } | 
|  | 2150       characters_filled_in++; | 
|  | 2151       ASSERT(characters_filled_in <= details->characters()); | 
|  | 2152       if (characters_filled_in == details->characters()) { | 
|  | 2153         return; | 
|  | 2154       } | 
|  | 2155     } | 
|  | 2156   } | 
|  | 2157   ASSERT(characters_filled_in != details->characters()); | 
|  | 2158   on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in); | 
|  | 2159 } | 
|  | 2160 | 
|  | 2161 | 
|  | 2162 void QuickCheckDetails::Clear() { | 
|  | 2163   for (int i = 0; i < characters_; i++) { | 
|  | 2164     positions_[i].mask = 0; | 
|  | 2165     positions_[i].value = 0; | 
|  | 2166     positions_[i].determines_perfectly = false; | 
|  | 2167   } | 
|  | 2168   characters_ = 0; | 
|  | 2169 } | 
|  | 2170 | 
|  | 2171 | 
|  | 2172 void QuickCheckDetails::Advance(int by, bool ascii) { | 
|  | 2173   ASSERT(by > 0); | 
|  | 2174   if (by >= characters_) { | 
|  | 2175     Clear(); | 
|  | 2176     return; | 
|  | 2177   } | 
|  | 2178   for (int i = 0; i < characters_ - by; i++) { | 
|  | 2179     positions_[i] = positions_[by + i]; | 
|  | 2180   } | 
|  | 2181   for (int i = characters_ - by; i < characters_; i++) { | 
|  | 2182     positions_[i].mask = 0; | 
|  | 2183     positions_[i].value = 0; | 
|  | 2184     positions_[i].determines_perfectly = false; | 
|  | 2185   } | 
|  | 2186   characters_ -= by; | 
|  | 2187   // We could change mask_ and value_ here but we would never advance unless | 
|  | 2188   // they had already been used in a check and they won't be used again because | 
|  | 2189   // it would gain us nothing.  So there's no point. | 
|  | 2190 } | 
|  | 2191 | 
|  | 2192 | 
|  | 2193 void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) { | 
|  | 2194   ASSERT(characters_ == other->characters_); | 
|  | 2195   for (int i = from_index; i < characters_; i++) { | 
|  | 2196     QuickCheckDetails::Position* pos = positions(i); | 
|  | 2197     QuickCheckDetails::Position* other_pos = other->positions(i); | 
|  | 2198     if (pos->mask != other_pos->mask || | 
|  | 2199         pos->value != other_pos->value || | 
|  | 2200         !other_pos->determines_perfectly) { | 
|  | 2201       // Our mask-compare operation will be approximate unless we have the | 
|  | 2202       // exact same operation on both sides of the alternation. | 
|  | 2203       pos->determines_perfectly = false; | 
|  | 2204     } | 
|  | 2205     pos->mask &= other_pos->mask; | 
|  | 2206     pos->value &= pos->mask; | 
|  | 2207     other_pos->value &= pos->mask; | 
|  | 2208     uc16 differing_bits = (pos->value ^ other_pos->value); | 
|  | 2209     pos->mask &= ~differing_bits; | 
|  | 2210     pos->value &= pos->mask; | 
|  | 2211   } | 
|  | 2212 } | 
|  | 2213 | 
|  | 2214 | 
|  | 2215 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 
|  | 2216                                       RegExpCompiler* compiler, | 
|  | 2217                                       int characters_filled_in) { | 
|  | 2218   int choice_count = alternatives_->length(); | 
|  | 2219   ASSERT(choice_count > 0); | 
|  | 2220   alternatives_->at(0).node()->GetQuickCheckDetails(details, | 
|  | 2221                                                     compiler, | 
|  | 2222                                                     characters_filled_in); | 
|  | 2223   for (int i = 1; i < choice_count; i++) { | 
|  | 2224     QuickCheckDetails new_details(details->characters()); | 
|  | 2225     RegExpNode* node = alternatives_->at(i).node(); | 
|  | 2226     node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in); | 
|  | 2227     // Here we merge the quick match details of the two branches. | 
|  | 2228     details->Merge(&new_details, characters_filled_in); | 
|  | 2229   } | 
|  | 2230 } | 
|  | 2231 | 
|  | 2232 | 
|  | 2233 // We call this repeatedly to generate code for each pass over the text node. | 
|  | 2234 // The passes are in increasing order of difficulty because we hope one | 
|  | 2235 // of the first passes will fail in which case we are saved the work of the | 
|  | 2236 // later passes.  for example for the case independent regexp /%[asdfghjkl]a/ | 
|  | 2237 // we will check the '%' in the first pass, the case independent 'a' in the | 
|  | 2238 // second pass and the character class in the last pass. | 
|  | 2239 // | 
|  | 2240 // The passes are done from right to left, so for example to test for /bar/ | 
|  | 2241 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 | 
|  | 2242 // and then a 'b' with offset 0.  This means we can avoid the end-of-input | 
|  | 2243 // bounds check most of the time.  In the example we only need to check for | 
|  | 2244 // end-of-input when loading the putative 'r'. | 
|  | 2245 // | 
|  | 2246 // A slight complication involves the fact that the first character may already | 
|  | 2247 // be fetched into a register by the previous node.  In this case we want to | 
|  | 2248 // do the test for that character first.  We do this in separate passes.  The | 
|  | 2249 // 'preloaded' argument indicates that we are doing such a 'pass'.  If such a | 
|  | 2250 // pass has been performed then subsequent passes will have true in | 
|  | 2251 // first_element_checked to indicate that that character does not need to be | 
|  | 2252 // checked again. | 
|  | 2253 // | 
|  | 2254 // In addition to all this we are passed a GenerationVariant, which can | 
|  | 2255 // contain an AlternativeGeneration object.  In this AlternativeGeneration | 
|  | 2256 // object we can see details of any quick check that was already passed in | 
|  | 2257 // order to get to the code we are now generating.  The quick check can involve | 
|  | 2258 // loading characters, which means we do not need to recheck the bounds | 
|  | 2259 // up to the limit the quick check already checked.  In addition the quick | 
|  | 2260 // check can have involved a mask and compare operation which may simplify | 
|  | 2261 // or obviate the need for further checks at some character positions. | 
|  | 2262 void TextNode::TextEmitPass(RegExpCompiler* compiler, | 
|  | 2263                             TextEmitPassType pass, | 
|  | 2264                             bool preloaded, | 
|  | 2265                             GenerationVariant* variant, | 
|  | 2266                             bool first_element_checked, | 
|  | 2267                             int* checked_up_to) { | 
|  | 2268   RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
|  | 2269   bool ascii = compiler->ascii(); | 
|  | 2270   Label* backtrack = variant->backtrack(); | 
|  | 2271   QuickCheckDetails* quick_check = variant->quick_check_performed(); | 
|  | 2272   int element_count = elms_->length(); | 
|  | 2273   for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 
|  | 2274     TextElement elm = elms_->at(i); | 
|  | 2275     int cp_offset = variant->cp_offset() + elm.cp_offset; | 
|  | 2276     if (elm.type == TextElement::ATOM) { | 
|  | 2277       if (pass == NON_ASCII_MATCH || | 
|  | 2278           pass == CHARACTER_MATCH || | 
|  | 2279           pass == CASE_CHARACTER_MATCH) { | 
|  | 2280         Vector<const uc16> quarks = elm.data.u_atom->data(); | 
|  | 2281         for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 
|  | 2282           bool bound_checked = true;  // Most ops will check their bounds. | 
|  | 2283           if (first_element_checked && i == 0 && j == 0) continue; | 
|  | 2284           if (quick_check != NULL && | 
|  | 2285               elm.cp_offset + j < quick_check->characters() && | 
|  | 2286               quick_check->positions(elm.cp_offset + j)->determines_perfectly) { | 
|  | 2287             continue; | 
|  | 2288           } | 
|  | 2289           if (pass == NON_ASCII_MATCH) { | 
|  | 2290             ASSERT(ascii); | 
|  | 2291             if (quarks[j] > String::kMaxAsciiCharCode) { | 
|  | 2292               assembler->GoTo(backtrack); | 
|  | 2293               return; | 
|  | 2294             } | 
|  | 2295           } else if (pass == CHARACTER_MATCH) { | 
|  | 2296             if (compiler->ignore_case()) { | 
|  | 2297               bound_checked = EmitAtomNonLetter(assembler, | 
|  | 2298                                                 quarks[j], | 
|  | 2299                                                 backtrack, | 
|  | 2300                                                 cp_offset + j, | 
|  | 2301                                                 *checked_up_to < cp_offset + j, | 
|  | 2302                                                 preloaded); | 
|  | 2303             } else { | 
|  | 2304               if (!preloaded) { | 
|  | 2305                 assembler->LoadCurrentCharacter(cp_offset + j, | 
|  | 2306                                                 backtrack, | 
|  | 2307                                                 *checked_up_to < cp_offset + j); | 
|  | 2308               } | 
|  | 2309               assembler->CheckNotCharacter(quarks[j], backtrack); | 
|  | 2310             } | 
|  | 2311           } else { | 
|  | 2312             ASSERT_EQ(pass, CASE_CHARACTER_MATCH); | 
|  | 2313             ASSERT(compiler->ignore_case()); | 
|  | 2314             bound_checked = EmitAtomLetter(assembler, | 
|  | 2315                                            quarks[j], | 
|  | 2316                                            backtrack, | 
|  | 2317                                            cp_offset + j, | 
|  | 2318                                            *checked_up_to < cp_offset + j, | 
|  | 2319                                            preloaded); | 
|  | 2320           } | 
|  | 2321           if (pass != NON_ASCII_MATCH && bound_checked) { | 
|  | 2322             if (cp_offset + j > *checked_up_to) { | 
|  | 2323               *checked_up_to = cp_offset + j; | 
|  | 2324             } | 
|  | 2325           } | 
|  | 2326         } | 
|  | 2327       } | 
|  | 2328     } else { | 
|  | 2329       ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 
|  | 2330       if (first_element_checked && i == 0) continue; | 
|  | 2331       if (quick_check != NULL && | 
|  | 2332           elm.cp_offset < quick_check->characters() && | 
|  | 2333           quick_check->positions(elm.cp_offset)->determines_perfectly) { | 
|  | 2334         continue; | 
|  | 2335       } | 
|  | 2336       if (pass == CHARACTER_CLASS_MATCH) { | 
|  | 2337         RegExpCharacterClass* cc = elm.data.u_char_class; | 
|  | 2338         EmitCharClass(assembler, | 
|  | 2339                       cc, | 
|  | 2340                       cp_offset, | 
|  | 2341                       backtrack, | 
|  | 2342                       *checked_up_to < cp_offset, | 
|  | 2343                       ascii, | 
|  | 2344                       preloaded); | 
|  | 2345         if (cp_offset > *checked_up_to) { | 
|  | 2346           *checked_up_to = cp_offset; | 
|  | 2347         } | 
|  | 2348       } | 
|  | 2349     } | 
|  | 2350   } | 
|  | 2351 } | 
|  | 2352 | 
|  | 2353 | 
|  | 2354 int TextNode::Length() { | 
|  | 2355   TextElement elm = elms_->last(); | 
|  | 2356   ASSERT(elm.cp_offset >= 0); | 
|  | 2357   if (elm.type == TextElement::ATOM) { | 
|  | 2358     return elm.cp_offset + elm.data.u_atom->data().length(); | 
|  | 2359   } else { | 
|  | 2360     return elm.cp_offset + 1; | 
|  | 2361   } | 
|  | 2362 } | 
|  | 2363 | 
|  | 2364 | 
| 1911 // This generates the code to match a text node.  A text node can contain | 2365 // This generates the code to match a text node.  A text node can contain | 
| 1912 // straight character sequences (possibly to be matched in a case-independent | 2366 // straight character sequences (possibly to be matched in a case-independent | 
| 1913 // way) and character classes.  In order to be most efficient we test for the | 2367 // way) and character classes.  For efficiency we do not do this in a single | 
| 1914 // simple things first and then move on to the more complicated things.  The | 2368 // pass from left to right.  Instead we pass over the text node several times, | 
| 1915 // simplest thing is a non-letter or a letter if we are matching case.  The | 2369 // emitting code for some character positions every time.  See the comment on | 
| 1916 // next-most simple thing is a case-independent letter.  The least simple is | 2370 // TextEmitPass for details. | 
| 1917 // a character class.  Another optimization is that we test the last one first. |  | 
| 1918 // If that succeeds we don't need to test for the end of the string when we |  | 
| 1919 // load other characters. |  | 
| 1920 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 2371 bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 
| 1921   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |  | 
| 1922   Label *backtrack = variant->backtrack(); |  | 
| 1923   LimitResult limit_result = LimitVersions(compiler, variant); | 2372   LimitResult limit_result = LimitVersions(compiler, variant); | 
| 1924   if (limit_result == FAIL) return false; | 2373   if (limit_result == FAIL) return false; | 
| 1925   if (limit_result == DONE) return true; | 2374   if (limit_result == DONE) return true; | 
| 1926   ASSERT(limit_result == CONTINUE); | 2375   ASSERT(limit_result == CONTINUE); | 
| 1927 | 2376 | 
| 1928   int element_count = elms_->length(); | 2377   if (info()->follows_word_interest || | 
| 1929   ASSERT(element_count != 0); | 2378       info()->follows_newline_interest || | 
|  | 2379       info()->follows_start_interest) { | 
|  | 2380     return false; | 
|  | 2381   } | 
|  | 2382 | 
| 1930   if (info()->at_end) { | 2383   if (info()->at_end) { | 
| 1931     macro_assembler->GoTo(backtrack); | 2384     compiler->macro_assembler()->GoTo(variant->backtrack()); | 
| 1932     return true; | 2385     return true; | 
| 1933   } | 2386   } | 
| 1934   // First check for non-ASCII text. | 2387 | 
| 1935   // TODO(plesner): We should do this at node level. |  | 
| 1936   if (compiler->ascii()) { | 2388   if (compiler->ascii()) { | 
| 1937     for (int i = element_count - 1; i >= 0; i--) { | 2389     int dummy = 0; | 
| 1938       TextElement elm = elms_->at(i); | 2390     TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy); | 
| 1939       if (elm.type == TextElement::ATOM) { | 2391   } | 
| 1940         Vector<const uc16> quarks = elm.data.u_atom->data(); | 2392 | 
| 1941         for (int j = quarks.length() - 1; j >= 0; j--) { | 2393   bool first_elt_done = false; | 
| 1942           if (quarks[j] > String::kMaxAsciiCharCode) { | 2394   int bound_checked_to = variant->cp_offset() - 1; | 
| 1943             macro_assembler->GoTo(backtrack); | 2395   QuickCheckDetails* quick_check = variant->quick_check_performed(); | 
| 1944             return true; | 2396   if (quick_check != NULL) { | 
| 1945           } | 2397     bound_checked_to += quick_check->characters(); | 
| 1946         } | 2398   } | 
| 1947       } else { | 2399 | 
| 1948         ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 2400   // If a character is preloaded into the current character register then | 
| 1949       } | 2401   // check that now. | 
| 1950     } | 2402   if (variant->characters_preloaded() == 1) { | 
| 1951   } | 2403     TextEmitPass(compiler, | 
| 1952   // Second, handle straight character matches. | 2404                  CHARACTER_MATCH, | 
| 1953   int checked_up_to = -1; | 2405                  true, | 
| 1954   for (int i = element_count - 1; i >= 0; i--) { | 2406                  variant, | 
| 1955     TextElement elm = elms_->at(i); | 2407                  false, | 
| 1956     ASSERT(elm.cp_offset >= 0); | 2408                  &bound_checked_to); | 
| 1957     int cp_offset = variant->cp_offset() + elm.cp_offset; | 2409     if (compiler->ignore_case()) { | 
| 1958     if (elm.type == TextElement::ATOM) { | 2410       TextEmitPass(compiler, | 
| 1959       Vector<const uc16> quarks = elm.data.u_atom->data(); | 2411                    CASE_CHARACTER_MATCH, | 
| 1960       int last_cp_offset = cp_offset + quarks.length(); | 2412                    true, | 
| 1961       if (compiler->ignore_case()) { | 2413                    variant, | 
| 1962         EmitAtomNonLetters(macro_assembler, | 2414                    false, | 
| 1963                            elm, | 2415                    &bound_checked_to); | 
| 1964                            quarks, | 2416     } | 
| 1965                            backtrack, | 2417     TextEmitPass(compiler, | 
| 1966                            cp_offset, | 2418                  CHARACTER_CLASS_MATCH, | 
| 1967                            checked_up_to < last_cp_offset); | 2419                  true, | 
| 1968       } else { | 2420                  variant, | 
| 1969         macro_assembler->CheckCharacters(quarks, | 2421                  false, | 
| 1970                                          cp_offset, | 2422                  &bound_checked_to); | 
| 1971                                          backtrack, | 2423     first_elt_done = true; | 
| 1972                                          checked_up_to < last_cp_offset); | 2424   } | 
| 1973       } | 2425 | 
| 1974       if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 2426   TextEmitPass(compiler, | 
| 1975     } else { | 2427                CHARACTER_MATCH, | 
| 1976       ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); | 2428                false, | 
| 1977     } | 2429                variant, | 
| 1978   } | 2430                first_elt_done, | 
| 1979   // Third, handle case independent letter matches if any. | 2431                &bound_checked_to); | 
| 1980   if (compiler->ignore_case()) { | 2432   if (compiler->ignore_case()) { | 
| 1981     for (int i = element_count - 1; i >= 0; i--) { | 2433     TextEmitPass(compiler, | 
| 1982       TextElement elm = elms_->at(i); | 2434                  CASE_CHARACTER_MATCH, | 
| 1983       int cp_offset = variant->cp_offset() + elm.cp_offset; | 2435                  false, | 
| 1984       if (elm.type == TextElement::ATOM) { | 2436                  variant, | 
| 1985         Vector<const uc16> quarks = elm.data.u_atom->data(); | 2437                  first_elt_done, | 
| 1986         int last_cp_offset = cp_offset + quarks.length(); | 2438                  &bound_checked_to); | 
| 1987         EmitAtomLetters(macro_assembler, | 2439   } | 
| 1988                         elm, | 2440   TextEmitPass(compiler, | 
| 1989                         quarks, | 2441                CHARACTER_CLASS_MATCH, | 
| 1990                         backtrack, | 2442                false, | 
| 1991                         cp_offset, | 2443                variant, | 
| 1992                         checked_up_to < last_cp_offset); | 2444                first_elt_done, | 
| 1993         if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1; | 2445                &bound_checked_to); | 
| 1994       } | 2446 | 
| 1995     } | 2447   GenerationVariant successor_variant(*variant); | 
| 1996   } | 2448   successor_variant.AdvanceVariant(Length(), compiler->ascii()); | 
| 1997   // If the fast character matches passed then do the character classes. |  | 
| 1998   for (int i = element_count - 1; i >= 0; i--) { |  | 
| 1999     TextElement elm = elms_->at(i); |  | 
| 2000     int cp_offset = variant->cp_offset() + elm.cp_offset; |  | 
| 2001     if (elm.type == TextElement::CHAR_CLASS) { |  | 
| 2002       RegExpCharacterClass* cc = elm.data.u_char_class; |  | 
| 2003       EmitCharClass(macro_assembler, |  | 
| 2004                     cc, |  | 
| 2005                     cp_offset, |  | 
| 2006                     backtrack, |  | 
| 2007                     checked_up_to < cp_offset, |  | 
| 2008                     compiler->ascii()); |  | 
| 2009       if (cp_offset > checked_up_to) checked_up_to = cp_offset; |  | 
| 2010     } |  | 
| 2011   } |  | 
| 2012 |  | 
| 2013   GenerationVariant new_variant(*variant); |  | 
| 2014   new_variant.set_cp_offset(checked_up_to + 1); |  | 
| 2015   RecursionCheck rc(compiler); | 2449   RecursionCheck rc(compiler); | 
| 2016   return on_success()->Emit(compiler, &new_variant); | 2450   return on_success()->Emit(compiler, &successor_variant); | 
|  | 2451 } | 
|  | 2452 | 
|  | 2453 | 
|  | 2454 void GenerationVariant::AdvanceVariant(int by, bool ascii) { | 
|  | 2455   ASSERT(by > 0); | 
|  | 2456   // We don't have an instruction for shifting the current character register | 
|  | 2457   // down or for using a shifted value for anything so lets just forget that | 
|  | 2458   // we preloaded any characters into it. | 
|  | 2459   characters_preloaded_ = 0; | 
|  | 2460   // Adjust the offsets of the quick check performed information.  This | 
|  | 2461   // information is used to find out what we already determined about the | 
|  | 2462   // characters by means of mask and compare. | 
|  | 2463   quick_check_performed_.Advance(by, ascii); | 
|  | 2464   cp_offset_ += by; | 
| 2017 } | 2465 } | 
| 2018 | 2466 | 
| 2019 | 2467 | 
| 2020 void TextNode::MakeCaseIndependent() { | 2468 void TextNode::MakeCaseIndependent() { | 
| 2021   int element_count = elms_->length(); | 2469   int element_count = elms_->length(); | 
| 2022   for (int i = 0; i < element_count; i++) { | 2470   for (int i = 0; i < element_count; i++) { | 
| 2023     TextElement elm = elms_->at(i); | 2471     TextElement elm = elms_->at(i); | 
| 2024     if (elm.type == TextElement::CHAR_CLASS) { | 2472     if (elm.type == TextElement::CHAR_CLASS) { | 
| 2025       RegExpCharacterClass* cc = elm.data.u_char_class; | 2473       RegExpCharacterClass* cc = elm.data.u_char_class; | 
| 2026       ZoneList<CharacterRange>* ranges = cc->ranges(); | 2474       ZoneList<CharacterRange>* ranges = cc->ranges(); | 
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 2103     return true; | 2551     return true; | 
| 2104   } | 2552   } | 
| 2105   ASSERT(variant->stop_node() == NULL); | 2553   ASSERT(variant->stop_node() == NULL); | 
| 2106   if (!variant->is_trivial()) { | 2554   if (!variant->is_trivial()) { | 
| 2107     return variant->Flush(compiler, this); | 2555     return variant->Flush(compiler, this); | 
| 2108   } | 2556   } | 
| 2109   return ChoiceNode::Emit(compiler, variant); | 2557   return ChoiceNode::Emit(compiler, variant); | 
| 2110 } | 2558 } | 
| 2111 | 2559 | 
| 2112 | 2560 | 
|  | 2561 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, | 
|  | 2562                                            int start_from_node) { | 
|  | 2563   bool ascii = compiler->ascii(); | 
|  | 2564   int preload_characters = ChoiceNode::EatsAtLeastHelper(0, start_from_node); | 
|  | 2565 #ifdef CAN_READ_UNALIGNED | 
|  | 2566   if (ascii) { | 
|  | 2567     if (preload_characters > 4) preload_characters = 4; | 
|  | 2568     // We can't preload 3 characters because there is no machine instruction | 
|  | 2569     // to do that.  We can't just load 4 because we could be reading | 
|  | 2570     // beyond the end of the string, which could cause a memory fault. | 
|  | 2571     if (preload_characters == 3) preload_characters = 2; | 
|  | 2572   } else { | 
|  | 2573     if (preload_characters > 2) preload_characters = 2; | 
|  | 2574   } | 
|  | 2575 #else | 
|  | 2576   if (preload_characters > 1) preload_characters = 1; | 
|  | 2577 #endif | 
|  | 2578   return preload_characters; | 
|  | 2579 } | 
|  | 2580 | 
|  | 2581 | 
|  | 2582 // This class is used when generating the alternatives in a choice node.  It | 
|  | 2583 // records the way the alternative is being code generated. | 
|  | 2584 class AlternativeGeneration: public Malloced { | 
|  | 2585  public: | 
|  | 2586   AlternativeGeneration() | 
|  | 2587       : possible_success(), | 
|  | 2588         expects_preload(false), | 
|  | 2589         after(), | 
|  | 2590         quick_check_details() { } | 
|  | 2591   Label possible_success; | 
|  | 2592   bool expects_preload; | 
|  | 2593   Label after; | 
|  | 2594   QuickCheckDetails quick_check_details; | 
|  | 2595 }; | 
|  | 2596 | 
|  | 2597 | 
|  | 2598 // Creates a list of AlternativeGenerations.  If the list has a reasonable | 
|  | 2599 // size then it is on the stack, otherwise the excess is on the heap. | 
|  | 2600 class AlternativeGenerationList { | 
|  | 2601  public: | 
|  | 2602   explicit AlternativeGenerationList(int count) | 
|  | 2603       : alt_gens_(count) { | 
|  | 2604     for (int i = 0; i < count && i < kAFew; i++) { | 
|  | 2605       alt_gens_.Add(a_few_alt_gens_ + i); | 
|  | 2606     } | 
|  | 2607     for (int i = kAFew; i < count; i++) { | 
|  | 2608       alt_gens_.Add(new AlternativeGeneration()); | 
|  | 2609     } | 
|  | 2610   } | 
|  | 2611   ~AlternativeGenerationList() { | 
|  | 2612     for (int i = 0; i < alt_gens_.length(); i++) { | 
|  | 2613       alt_gens_[i]->possible_success.Unuse(); | 
|  | 2614       alt_gens_[i]->after.Unuse(); | 
|  | 2615     } | 
|  | 2616     for (int i = kAFew; i < alt_gens_.length(); i++) { | 
|  | 2617       delete alt_gens_[i]; | 
|  | 2618       alt_gens_[i] = NULL; | 
|  | 2619     } | 
|  | 2620   } | 
|  | 2621 | 
|  | 2622   AlternativeGeneration* at(int i) { | 
|  | 2623     return alt_gens_[i]; | 
|  | 2624   } | 
|  | 2625  private: | 
|  | 2626   static const int kAFew = 10; | 
|  | 2627   ZoneList<AlternativeGeneration*> alt_gens_; | 
|  | 2628   AlternativeGeneration a_few_alt_gens_[kAFew]; | 
|  | 2629 }; | 
|  | 2630 | 
|  | 2631 | 
|  | 2632 /* Code generation for choice nodes. | 
|  | 2633  * | 
|  | 2634  * We generate quick checks that do a mask and compare to eliminate a | 
|  | 2635  * choice.  If the quick check succeeds then it jumps to the continuation to | 
|  | 2636  * do slow checks and check subsequent nodes.  If it fails (the common case) | 
|  | 2637  * it falls through to the next choice. | 
|  | 2638  * | 
|  | 2639  * Here is the desired flow graph.  Nodes directly below each other imply | 
|  | 2640  * fallthrough.  Alternatives 1 and 2 have quick checks.  Alternative | 
|  | 2641  * 3 doesn't have a quick check so we have to call the slow check. | 
|  | 2642  * Nodes are marked Qn for quick checks and Sn for slow checks.  The entire | 
|  | 2643  * regexp continuation is generated directly after the Sn node, up to the | 
|  | 2644  * next GoTo if we decide to reuse some already generated code.  Some | 
|  | 2645  * nodes expect preload_characters to be preloaded into the current | 
|  | 2646  * character register.  R nodes do this preloading.  Vertices are marked | 
|  | 2647  * F for failures and S for success (possible success in the case of quick | 
|  | 2648  * nodes).  L, V, < and > are used as arrow heads. | 
|  | 2649  * | 
|  | 2650  * ----------> R | 
|  | 2651  *             | | 
|  | 2652  *             V | 
|  | 2653  *            Q1 -----> S1 | 
|  | 2654  *             |   S   / | 
|  | 2655  *            F|      / | 
|  | 2656  *             |    F/ | 
|  | 2657  *             |    / | 
|  | 2658  *             |   R | 
|  | 2659  *             |  / | 
|  | 2660  *             V L | 
|  | 2661  *            Q2 -----> S2 | 
|  | 2662  *             |   S   / | 
|  | 2663  *            F|      / | 
|  | 2664  *             |    F/ | 
|  | 2665  *             |    / | 
|  | 2666  *             |   R | 
|  | 2667  *             |  / | 
|  | 2668  *             V L | 
|  | 2669  *            S3 | 
|  | 2670  *             | | 
|  | 2671  *            F| | 
|  | 2672  *             | | 
|  | 2673  *             R | 
|  | 2674  *             | | 
|  | 2675  * backtrack   V | 
|  | 2676  * <----------Q4 | 
|  | 2677  *   \    F    | | 
|  | 2678  *    \        |S | 
|  | 2679  *     \   F   V | 
|  | 2680  *      \-----S4 | 
|  | 2681  * | 
|  | 2682  * For greedy loops we reverse our expectation and expect to match rather | 
|  | 2683  * than fail. Therefore we want the loop code to look like this (U is the | 
|  | 2684  * unwind code that steps back in the greedy loop).  The following alternatives | 
|  | 2685  * look the same as above. | 
|  | 2686  *              _____ | 
|  | 2687  *             /     \ | 
|  | 2688  *             V     | | 
|  | 2689  * ----------> S1    | | 
|  | 2690  *            /|     | | 
|  | 2691  *           / |S    | | 
|  | 2692  *         F/  \_____/ | 
|  | 2693  *         / | 
|  | 2694  *        |<----------- | 
|  | 2695  *        |            \ | 
|  | 2696  *        V             \ | 
|  | 2697  *        Q2 ---> S2     \ | 
|  | 2698  *        |  S   /       | | 
|  | 2699  *       F|     /        | | 
|  | 2700  *        |   F/         | | 
|  | 2701  *        |   /          | | 
|  | 2702  *        |  R           | | 
|  | 2703  *        | /            | | 
|  | 2704  *   F    VL             | | 
|  | 2705  * <------U              | | 
|  | 2706  * back   |S             | | 
|  | 2707  *        \______________/ | 
|  | 2708  */ | 
|  | 2709 | 
|  | 2710 | 
| 2113 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 2711 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 
| 2114   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 2712   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 
| 2115   int choice_count = alternatives_->length(); | 2713   int choice_count = alternatives_->length(); | 
| 2116 #ifdef DEBUG | 2714 #ifdef DEBUG | 
| 2117   for (int i = 0; i < choice_count - 1; i++) { | 2715   for (int i = 0; i < choice_count - 1; i++) { | 
| 2118     GuardedAlternative alternative = alternatives_->at(i); | 2716     GuardedAlternative alternative = alternatives_->at(i); | 
| 2119     ZoneList<Guard*>* guards = alternative.guards(); | 2717     ZoneList<Guard*>* guards = alternative.guards(); | 
| 2120     int guard_count = (guards == NULL) ? 0 : guards->length(); | 2718     int guard_count = (guards == NULL) ? 0 : guards->length(); | 
| 2121     for (int j = 0; j < guard_count; j++) { | 2719     for (int j = 0; j < guard_count; j++) { | 
| 2122       ASSERT(!variant->mentions_reg(guards->at(j)->reg())); | 2720       ASSERT(!variant->mentions_reg(guards->at(j)->reg())); | 
| 2123     } | 2721     } | 
| 2124   } | 2722   } | 
| 2125 #endif | 2723 #endif | 
| 2126 | 2724 | 
| 2127   LimitResult limit_result = LimitVersions(compiler, variant); | 2725   LimitResult limit_result = LimitVersions(compiler, variant); | 
| 2128   if (limit_result == DONE) return true; | 2726   if (limit_result == DONE) return true; | 
| 2129   if (limit_result == FAIL) return false; | 2727   if (limit_result == FAIL) return false; | 
| 2130   ASSERT(limit_result == CONTINUE); | 2728   ASSERT(limit_result == CONTINUE); | 
| 2131 | 2729 | 
| 2132   RecursionCheck rc(compiler); | 2730   RecursionCheck rc(compiler); | 
| 2133 | 2731 | 
| 2134   GenerationVariant* current_variant = variant; | 2732   GenerationVariant* current_variant = variant; | 
| 2135 | 2733 | 
| 2136   int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | 2734   int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); | 
| 2137   bool greedy_loop = false; | 2735   bool greedy_loop = false; | 
| 2138   Label greedy_loop_label; | 2736   Label greedy_loop_label; | 
| 2139   GenerationVariant counter_backtrack_variant(&greedy_loop_label); | 2737   GenerationVariant counter_backtrack_variant; | 
|  | 2738   counter_backtrack_variant.set_backtrack(&greedy_loop_label); | 
| 2140   if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 2739   if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { | 
| 2141     // Here we have special handling for greedy loops containing only text nodes | 2740     // Here we have special handling for greedy loops containing only text nodes | 
| 2142     // and other simple nodes.  These are handled by pushing the current | 2741     // and other simple nodes.  These are handled by pushing the current | 
| 2143     // position on the stack and then incrementing the current position each | 2742     // position on the stack and then incrementing the current position each | 
| 2144     // time around the switch.  On backtrack we decrement the current position | 2743     // time around the switch.  On backtrack we decrement the current position | 
| 2145     // and check it against the pushed value.  This avoids pushing backtrack | 2744     // and check it against the pushed value.  This avoids pushing backtrack | 
| 2146     // information for each iteration of the loop, which could take up a lot of | 2745     // information for each iteration of the loop, which could take up a lot of | 
| 2147     // space. | 2746     // space. | 
| 2148     greedy_loop = true; | 2747     greedy_loop = true; | 
| 2149     ASSERT(variant->stop_node() == NULL); | 2748     ASSERT(variant->stop_node() == NULL); | 
| 2150     macro_assembler->PushCurrentPosition(); | 2749     macro_assembler->PushCurrentPosition(); | 
| 2151     current_variant = &counter_backtrack_variant; | 2750     current_variant = &counter_backtrack_variant; | 
| 2152     Label greedy_match_failed; | 2751     Label greedy_match_failed; | 
| 2153     GenerationVariant greedy_match_variant(&greedy_match_failed); | 2752     GenerationVariant greedy_match_variant; | 
|  | 2753     greedy_match_variant.set_backtrack(&greedy_match_failed); | 
| 2154     Label loop_label; | 2754     Label loop_label; | 
| 2155     macro_assembler->Bind(&loop_label); | 2755     macro_assembler->Bind(&loop_label); | 
| 2156     greedy_match_variant.set_stop_node(this); | 2756     greedy_match_variant.set_stop_node(this); | 
| 2157     greedy_match_variant.set_loop_label(&loop_label); | 2757     greedy_match_variant.set_loop_label(&loop_label); | 
| 2158     bool ok = alternatives_->at(0).node()->Emit(compiler, | 2758     bool ok = alternatives_->at(0).node()->Emit(compiler, | 
| 2159                                                 &greedy_match_variant); | 2759                                                 &greedy_match_variant); | 
| 2160     macro_assembler->Bind(&greedy_match_failed); | 2760     macro_assembler->Bind(&greedy_match_failed); | 
| 2161     if (!ok) { | 2761     if (!ok) { | 
| 2162       greedy_loop_label.Unuse(); | 2762       greedy_loop_label.Unuse(); | 
| 2163       return false; | 2763       return false; | 
| 2164     } | 2764     } | 
| 2165   } | 2765   } | 
| 2166 | 2766 | 
| 2167   Label second_choice;  // For use in greedy matches. | 2767   Label second_choice;  // For use in greedy matches. | 
| 2168   macro_assembler->Bind(&second_choice); | 2768   macro_assembler->Bind(&second_choice); | 
| 2169 | 2769 | 
|  | 2770   int first_normal_choice = greedy_loop ? 1 : 0; | 
|  | 2771 | 
|  | 2772   int preload_characters = CalculatePreloadCharacters(compiler, | 
|  | 2773                                                       first_normal_choice); | 
|  | 2774   bool preload_is_current = false; | 
|  | 2775   bool preload_has_checked_bounds = false; | 
|  | 2776 | 
|  | 2777   AlternativeGenerationList alt_gens(choice_count); | 
|  | 2778 | 
| 2170   // For now we just call all choices one after the other.  The idea ultimately | 2779   // For now we just call all choices one after the other.  The idea ultimately | 
| 2171   // is to use the Dispatch table to try only the relevant ones. | 2780   // is to use the Dispatch table to try only the relevant ones. | 
| 2172   for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) { | 2781   for (int i = first_normal_choice; i < choice_count; i++) { | 
| 2173     GuardedAlternative alternative = alternatives_->at(i); | 2782     GuardedAlternative alternative = alternatives_->at(i); | 
| 2174     Label after; | 2783     AlternativeGeneration* alt_gen(alt_gens.at(i)); | 
|  | 2784     alt_gen->quick_check_details.set_characters(preload_characters); | 
| 2175     ZoneList<Guard*>* guards = alternative.guards(); | 2785     ZoneList<Guard*>* guards = alternative.guards(); | 
| 2176     int guard_count = (guards == NULL) ? 0 : guards->length(); | 2786     int guard_count = (guards == NULL) ? 0 : guards->length(); | 
|  | 2787 | 
| 2177     GenerationVariant new_variant(*current_variant); | 2788     GenerationVariant new_variant(*current_variant); | 
| 2178     new_variant.set_backtrack(&after); | 2789     new_variant.set_characters_preloaded(preload_is_current ? | 
| 2179     for (int j = 0; j < guard_count; j++) { | 2790                                          preload_characters : | 
| 2180       GenerateGuard(macro_assembler, guards->at(j), &new_variant); | 2791                                          0); | 
|  | 2792     new_variant.quick_check_performed()->Clear(); | 
|  | 2793     alt_gen->expects_preload = preload_is_current; | 
|  | 2794     bool generate_full_check_inline = false; | 
|  | 2795     if (alternative.node()->EmitQuickCheck(compiler, | 
|  | 2796                                            &new_variant, | 
|  | 2797                                            preload_has_checked_bounds, | 
|  | 2798                                            &alt_gen->possible_success, | 
|  | 2799                                            &alt_gen->quick_check_details, | 
|  | 2800                                            i < choice_count - 1)) { | 
|  | 2801       // Quick check was generated for this choice. | 
|  | 2802       preload_is_current = true; | 
|  | 2803       preload_has_checked_bounds = true; | 
|  | 2804       // On the last choice in the ChoiceNode we generated the quick | 
|  | 2805       // check to fall through on possible success.  So now we need to | 
|  | 2806       // generate the full check inline. | 
|  | 2807       if (i == choice_count - 1) { | 
|  | 2808         macro_assembler->Bind(&alt_gen->possible_success); | 
|  | 2809         new_variant.set_quick_check_performed(&alt_gen->quick_check_details); | 
|  | 2810         generate_full_check_inline = true; | 
|  | 2811       } | 
|  | 2812     } else { | 
|  | 2813       // No quick check was generated.  Put the full code here. | 
|  | 2814       if (i < choice_count - 1) { | 
|  | 2815         new_variant.set_backtrack(&alt_gen->after); | 
|  | 2816       } | 
|  | 2817       generate_full_check_inline = true; | 
| 2181     } | 2818     } | 
| 2182     if (!alternative.node()->Emit(compiler, &new_variant)) { | 2819     if (generate_full_check_inline) { | 
| 2183       after.Unuse(); | 2820       if (preload_is_current) { | 
| 2184       return false; | 2821         new_variant.set_characters_preloaded(preload_characters); | 
|  | 2822       } | 
|  | 2823       for (int j = 0; j < guard_count; j++) { | 
|  | 2824         GenerateGuard(macro_assembler, guards->at(j), &new_variant); | 
|  | 2825       } | 
|  | 2826       if (!alternative.node()->Emit(compiler, &new_variant)) { | 
|  | 2827         greedy_loop_label.Unuse(); | 
|  | 2828         return false; | 
|  | 2829       } | 
|  | 2830       preload_is_current = false; | 
| 2185     } | 2831     } | 
| 2186     macro_assembler->Bind(&after); | 2832     macro_assembler->Bind(&alt_gen->after); | 
| 2187   } | 2833   } | 
| 2188   GuardedAlternative alternative = alternatives_->at(choice_count - 1); |  | 
| 2189   ZoneList<Guard*>* guards = alternative.guards(); |  | 
| 2190   int guard_count = (guards == NULL) ? 0 : guards->length(); |  | 
| 2191   for (int j = 0; j < guard_count; j++) { |  | 
| 2192     GenerateGuard(macro_assembler, guards->at(j), current_variant); |  | 
| 2193   } |  | 
| 2194   bool ok = alternative.node()->Emit(compiler, current_variant); |  | 
| 2195   if (!ok) return false; |  | 
| 2196   if (greedy_loop) { | 2834   if (greedy_loop) { | 
| 2197     macro_assembler->Bind(&greedy_loop_label); | 2835     macro_assembler->Bind(&greedy_loop_label); | 
| 2198     // If we have unwound to the bottom then backtrack. | 2836     // If we have unwound to the bottom then backtrack. | 
| 2199     macro_assembler->CheckGreedyLoop(variant->backtrack()); | 2837     macro_assembler->CheckGreedyLoop(variant->backtrack()); | 
| 2200     // Otherwise try the second priority at an earlier position. | 2838     // Otherwise try the second priority at an earlier position. | 
| 2201     macro_assembler->AdvanceCurrentPosition(-text_length); | 2839     macro_assembler->AdvanceCurrentPosition(-text_length); | 
| 2202     macro_assembler->GoTo(&second_choice); | 2840     macro_assembler->GoTo(&second_choice); | 
| 2203   } | 2841   } | 
|  | 2842   // At this point we need to generate slow checks for the alternatives where | 
|  | 2843   // the quick check was inlined.  We can recognize these because the associated | 
|  | 2844   // label was bound. | 
|  | 2845   for (int i = first_normal_choice; i < choice_count - 1; i++) { | 
|  | 2846     AlternativeGeneration* alt_gen = alt_gens.at(i); | 
|  | 2847     if (!EmitOutOfLineContinuation(compiler, | 
|  | 2848                                    current_variant, | 
|  | 2849                                    alternatives_->at(i), | 
|  | 2850                                    alt_gen, | 
|  | 2851                                    preload_characters, | 
|  | 2852                                    alt_gens.at(i + 1)->expects_preload)) { | 
|  | 2853       return false; | 
|  | 2854     } | 
|  | 2855   } | 
| 2204   return true; | 2856   return true; | 
| 2205 } | 2857 } | 
| 2206 | 2858 | 
| 2207 | 2859 | 
|  | 2860 bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, | 
|  | 2861                                            GenerationVariant* variant, | 
|  | 2862                                            GuardedAlternative alternative, | 
|  | 2863                                            AlternativeGeneration* alt_gen, | 
|  | 2864                                            int preload_characters, | 
|  | 2865                                            bool next_expects_preload) { | 
|  | 2866   if (!alt_gen->possible_success.is_linked()) return true; | 
|  | 2867 | 
|  | 2868   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 
|  | 2869   macro_assembler->Bind(&alt_gen->possible_success); | 
|  | 2870   GenerationVariant out_of_line_variant(*variant); | 
|  | 2871   out_of_line_variant.set_characters_preloaded(preload_characters); | 
|  | 2872   out_of_line_variant.set_quick_check_performed(&alt_gen->quick_check_details); | 
|  | 2873   ZoneList<Guard*>* guards = alternative.guards(); | 
|  | 2874   int guard_count = (guards == NULL) ? 0 : guards->length(); | 
|  | 2875   if (next_expects_preload) { | 
|  | 2876     Label reload_current_char; | 
|  | 2877     out_of_line_variant.set_backtrack(&reload_current_char); | 
|  | 2878     for (int j = 0; j < guard_count; j++) { | 
|  | 2879       GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant); | 
|  | 2880     } | 
|  | 2881     bool ok = alternative.node()->Emit(compiler, &out_of_line_variant); | 
|  | 2882     macro_assembler->Bind(&reload_current_char); | 
|  | 2883     // Reload the current character, since the next quick check expects that. | 
|  | 2884     // We don't need to check bounds here because we only get into this | 
|  | 2885     // code through a quick check which already did the checked load. | 
|  | 2886     macro_assembler->LoadCurrentCharacter(variant->cp_offset(), | 
|  | 2887                                           NULL, | 
|  | 2888                                           false, | 
|  | 2889                                           preload_characters); | 
|  | 2890     macro_assembler->GoTo(&(alt_gen->after)); | 
|  | 2891     return ok; | 
|  | 2892   } else { | 
|  | 2893     out_of_line_variant.set_backtrack(&(alt_gen->after)); | 
|  | 2894     for (int j = 0; j < guard_count; j++) { | 
|  | 2895       GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant); | 
|  | 2896     } | 
|  | 2897     return alternative.node()->Emit(compiler, &out_of_line_variant); | 
|  | 2898   } | 
|  | 2899 } | 
|  | 2900 | 
|  | 2901 | 
| 2208 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 2902 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) { | 
| 2209   RegExpMacroAssembler* macro = compiler->macro_assembler(); | 2903   RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
| 2210   LimitResult limit_result = LimitVersions(compiler, variant); | 2904   LimitResult limit_result = LimitVersions(compiler, variant); | 
| 2211   if (limit_result == DONE) return true; | 2905   if (limit_result == DONE) return true; | 
| 2212   if (limit_result == FAIL) return false; | 2906   if (limit_result == FAIL) return false; | 
| 2213   ASSERT(limit_result == CONTINUE); | 2907   ASSERT(limit_result == CONTINUE); | 
| 2214 | 2908 | 
| 2215   RecursionCheck rc(compiler); | 2909   RecursionCheck rc(compiler); | 
| 2216 | 2910 | 
| 2217   switch (type_) { | 2911   switch (type_) { | 
| 2218     case STORE_POSITION: { | 2912     case STORE_POSITION: { | 
| 2219       GenerationVariant::DeferredCapture | 2913       GenerationVariant::DeferredCapture | 
| (...skipping 11 matching lines...) Expand all  Loading... | 
| 2231     } | 2925     } | 
| 2232     case SET_REGISTER: { | 2926     case SET_REGISTER: { | 
| 2233       GenerationVariant::DeferredSetRegister | 2927       GenerationVariant::DeferredSetRegister | 
| 2234           new_set(data_.u_store_register.reg, data_.u_store_register.value); | 2928           new_set(data_.u_store_register.reg, data_.u_store_register.value); | 
| 2235       GenerationVariant new_variant = *variant; | 2929       GenerationVariant new_variant = *variant; | 
| 2236       new_variant.add_action(&new_set); | 2930       new_variant.add_action(&new_set); | 
| 2237       return on_success()->Emit(compiler, &new_variant); | 2931       return on_success()->Emit(compiler, &new_variant); | 
| 2238     } | 2932     } | 
| 2239     case BEGIN_SUBMATCH: | 2933     case BEGIN_SUBMATCH: | 
| 2240       if (!variant->is_trivial()) return variant->Flush(compiler, this); | 2934       if (!variant->is_trivial()) return variant->Flush(compiler, this); | 
| 2241       macro->WriteCurrentPositionToRegister( | 2935       assembler->WriteCurrentPositionToRegister( | 
| 2242           data_.u_submatch.current_position_register, 0); | 2936           data_.u_submatch.current_position_register, 0); | 
| 2243       macro->WriteStackPointerToRegister( | 2937       assembler->WriteStackPointerToRegister( | 
| 2244           data_.u_submatch.stack_pointer_register); | 2938           data_.u_submatch.stack_pointer_register); | 
| 2245       return on_success()->Emit(compiler, variant); | 2939       return on_success()->Emit(compiler, variant); | 
| 2246     case POSITIVE_SUBMATCH_SUCCESS: | 2940     case POSITIVE_SUBMATCH_SUCCESS: | 
| 2247       if (!variant->is_trivial()) return variant->Flush(compiler, this); | 2941       if (!variant->is_trivial()) return variant->Flush(compiler, this); | 
| 2248       // TODO(erikcorry): Implement support. | 2942       // TODO(erikcorry): Implement support. | 
| 2249       if (info()->follows_word_interest || | 2943       if (info()->follows_word_interest || | 
| 2250           info()->follows_newline_interest || | 2944           info()->follows_newline_interest || | 
| 2251           info()->follows_start_interest) { | 2945           info()->follows_start_interest) { | 
| 2252         return false; | 2946         return false; | 
| 2253       } | 2947       } | 
| 2254       if (info()->at_end) { | 2948       if (info()->at_end) { | 
| 2255         Label at_end; | 2949         Label at_end; | 
| 2256         // Load current character jumps to the label if we are beyond the string | 2950         // Load current character jumps to the label if we are beyond the string | 
| 2257         // end. | 2951         // end. | 
| 2258         macro->LoadCurrentCharacter(0, &at_end); | 2952         assembler->LoadCurrentCharacter(0, &at_end); | 
| 2259         macro->GoTo(variant->backtrack()); | 2953         assembler->GoTo(variant->backtrack()); | 
| 2260         macro->Bind(&at_end); | 2954         assembler->Bind(&at_end); | 
| 2261       } | 2955       } | 
| 2262       macro->ReadCurrentPositionFromRegister( | 2956       assembler->ReadCurrentPositionFromRegister( | 
| 2263           data_.u_submatch.current_position_register); | 2957           data_.u_submatch.current_position_register); | 
| 2264       macro->ReadStackPointerFromRegister( | 2958       assembler->ReadStackPointerFromRegister( | 
| 2265           data_.u_submatch.stack_pointer_register); | 2959           data_.u_submatch.stack_pointer_register); | 
| 2266       return on_success()->Emit(compiler, variant); | 2960       return on_success()->Emit(compiler, variant); | 
| 2267     default: | 2961     default: | 
| 2268       UNREACHABLE(); | 2962       UNREACHABLE(); | 
| 2269       return false; | 2963       return false; | 
| 2270   } | 2964   } | 
| 2271 } | 2965 } | 
| 2272 | 2966 | 
| 2273 | 2967 | 
| 2274 bool BackReferenceNode::Emit(RegExpCompiler* compiler, | 2968 bool BackReferenceNode::Emit(RegExpCompiler* compiler, | 
| 2275                              GenerationVariant* variant) { | 2969                              GenerationVariant* variant) { | 
| 2276   RegExpMacroAssembler* macro = compiler->macro_assembler(); | 2970   RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 
| 2277   if (!variant->is_trivial()) { | 2971   if (!variant->is_trivial()) { | 
| 2278     return variant->Flush(compiler, this); | 2972     return variant->Flush(compiler, this); | 
| 2279   } | 2973   } | 
| 2280 | 2974 | 
| 2281   LimitResult limit_result = LimitVersions(compiler, variant); | 2975   LimitResult limit_result = LimitVersions(compiler, variant); | 
| 2282   if (limit_result == DONE) return true; | 2976   if (limit_result == DONE) return true; | 
| 2283   if (limit_result == FAIL) return false; | 2977   if (limit_result == FAIL) return false; | 
| 2284   ASSERT(limit_result == CONTINUE); | 2978   ASSERT(limit_result == CONTINUE); | 
| 2285 | 2979 | 
| 2286   RecursionCheck rc(compiler); | 2980   RecursionCheck rc(compiler); | 
| 2287 | 2981 | 
| 2288   ASSERT_EQ(start_reg_ + 1, end_reg_); | 2982   ASSERT_EQ(start_reg_ + 1, end_reg_); | 
| 2289   if (info()->at_end) { | 2983   if (info()->at_end) { | 
| 2290     // If we are constrained to match at the end of the input then succeed | 2984     // If we are constrained to match at the end of the input then succeed | 
| 2291     // iff the back reference is empty. | 2985     // iff the back reference is empty. | 
| 2292     macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack()); | 2986     assembler->CheckNotRegistersEqual(start_reg_, | 
|  | 2987                                       end_reg_, | 
|  | 2988                                       variant->backtrack()); | 
| 2293   } else { | 2989   } else { | 
| 2294     if (compiler->ignore_case()) { | 2990     if (compiler->ignore_case()) { | 
| 2295       macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack()); | 2991       assembler->CheckNotBackReferenceIgnoreCase(start_reg_, | 
|  | 2992                                                  variant->backtrack()); | 
| 2296     } else { | 2993     } else { | 
| 2297       macro->CheckNotBackReference(start_reg_, variant->backtrack()); | 2994       assembler->CheckNotBackReference(start_reg_, variant->backtrack()); | 
| 2298     } | 2995     } | 
| 2299   } | 2996   } | 
| 2300   return on_success()->Emit(compiler, variant); | 2997   return on_success()->Emit(compiler, variant); | 
| 2301 } | 2998 } | 
| 2302 | 2999 | 
| 2303 | 3000 | 
| 2304 // ------------------------------------------------------------------- | 3001 // ------------------------------------------------------------------- | 
| 2305 // Dot/dotty output | 3002 // Dot/dotty output | 
| 2306 | 3003 | 
| 2307 | 3004 | 
| (...skipping 1281 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 3589   EmbeddedVector<byte, 1024> codes; | 4286   EmbeddedVector<byte, 1024> codes; | 
| 3590   RegExpMacroAssemblerIrregexp macro_assembler(codes); | 4287   RegExpMacroAssemblerIrregexp macro_assembler(codes); | 
| 3591   return compiler.Assemble(¯o_assembler, | 4288   return compiler.Assemble(¯o_assembler, | 
| 3592                            node, | 4289                            node, | 
| 3593                            data->capture_count, | 4290                            data->capture_count, | 
| 3594                            pattern); | 4291                            pattern); | 
| 3595 } | 4292 } | 
| 3596 | 4293 | 
| 3597 | 4294 | 
| 3598 }}  // namespace v8::internal | 4295 }}  // namespace v8::internal | 
| OLD | NEW | 
|---|