Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(264)

Side by Side Diff: src/jsregexp.cc

Issue 18842: Experimental: periodic merge of the bleeding_edge branch to the... (Closed) Base URL: http://v8.googlecode.com/svn/branches/experimental/toiger/
Patch Set: Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/macro-assembler-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 280 matching lines...) Expand 10 before | Expand all | Expand 10 after
291 291
292 292
293 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, 293 Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
294 Handle<String> subject, 294 Handle<String> subject,
295 Handle<Object> index) { 295 Handle<Object> index) {
296 switch (regexp->TypeTag()) { 296 switch (regexp->TypeTag()) {
297 case JSRegExp::ATOM: 297 case JSRegExp::ATOM:
298 return AtomExec(regexp, subject, index); 298 return AtomExec(regexp, subject, index);
299 case JSRegExp::IRREGEXP: { 299 case JSRegExp::IRREGEXP: {
300 Handle<Object> result = IrregexpExec(regexp, subject, index); 300 Handle<Object> result = IrregexpExec(regexp, subject, index);
301 if (!result.is_null() || Top::has_pending_exception()) { 301 if (result.is_null()) ASSERT(Top::has_pending_exception());
302 return result; 302 return result;
303 }
304 // We couldn't handle the regexp using Irregexp, so fall back
305 // on JSCRE.
306 // Reset the JSRegExp to use JSCRE.
307 JscrePrepare(regexp,
308 Handle<String>(regexp->Pattern()),
309 regexp->GetFlags());
310 // Fall-through to JSCRE.
311 } 303 }
312 case JSRegExp::JSCRE: 304 case JSRegExp::JSCRE:
313 if (FLAG_disable_jscre) {
314 UNIMPLEMENTED();
315 }
316 return JscreExec(regexp, subject, index); 305 return JscreExec(regexp, subject, index);
317 default: 306 default:
318 UNREACHABLE(); 307 UNREACHABLE();
319 return Handle<Object>::null(); 308 return Handle<Object>::null();
320 } 309 }
321 } 310 }
322 311
323 312
324 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp, 313 Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
325 Handle<String> subject) { 314 Handle<String> subject) {
326 switch (regexp->TypeTag()) { 315 switch (regexp->TypeTag()) {
327 case JSRegExp::ATOM: 316 case JSRegExp::ATOM:
328 return AtomExecGlobal(regexp, subject); 317 return AtomExecGlobal(regexp, subject);
329 case JSRegExp::IRREGEXP: { 318 case JSRegExp::IRREGEXP: {
330 Handle<Object> result = IrregexpExecGlobal(regexp, subject); 319 Handle<Object> result = IrregexpExecGlobal(regexp, subject);
331 if (!result.is_null() || Top::has_pending_exception()) { 320 if (result.is_null()) ASSERT(Top::has_pending_exception());
332 return result; 321 return result;
333 }
334 // Empty handle as result but no exception thrown means that
335 // the regexp contains features not yet handled by the irregexp
336 // compiler.
337 // We have to fall back on JSCRE. Reset the JSRegExp to use JSCRE.
338 JscrePrepare(regexp,
339 Handle<String>(regexp->Pattern()),
340 regexp->GetFlags());
341 // Fall-through to JSCRE.
342 } 322 }
343 case JSRegExp::JSCRE: 323 case JSRegExp::JSCRE:
344 if (FLAG_disable_jscre) {
345 UNIMPLEMENTED();
346 }
347 return JscreExecGlobal(regexp, subject); 324 return JscreExecGlobal(regexp, subject);
348 default: 325 default:
349 UNREACHABLE(); 326 UNREACHABLE();
350 return Handle<Object>::null(); 327 return Handle<Object>::null();
351 } 328 }
352 } 329 }
353 330
354 331
355 // RegExp Atom implementation: Simple string search using indexOf. 332 // RegExp Atom implementation: Simple string search using indexOf.
356 333
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 malloc_failure = Failure::Exception(); 430 malloc_failure = Failure::Exception();
454 *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(), 431 *code = v8::jscre::jsRegExpCompile(pattern->GetTwoByteData(),
455 pattern->length(), 432 pattern->length(),
456 case_option, 433 case_option,
457 multiline_option, 434 multiline_option,
458 number_of_captures, 435 number_of_captures,
459 error_message, 436 error_message,
460 &JSREMalloc, 437 &JSREMalloc,
461 &JSREFree); 438 &JSREFree);
462 if (*code == NULL && (malloc_failure->IsRetryAfterGC() || 439 if (*code == NULL && (malloc_failure->IsRetryAfterGC() ||
463 malloc_failure->IsOutOfMemoryFailure())) { 440 malloc_failure->IsOutOfMemoryFailure())) {
464 return malloc_failure; 441 return malloc_failure;
465 } else { 442 } else {
466 // It doesn't matter which object we return here, we just need to return 443 // It doesn't matter which object we return here, we just need to return
467 // a non-failure to indicate to the GC-retry code that there was no 444 // a non-failure to indicate to the GC-retry code that there was no
468 // allocation failure. 445 // allocation failure.
469 return pattern; 446 return pattern;
470 } 447 }
471 } 448 }
472 449
473 450
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after
690 } 667 }
691 668
692 // Compile the RegExp. 669 // Compile the RegExp.
693 ZoneScope zone_scope(DELETE_ON_EXIT); 670 ZoneScope zone_scope(DELETE_ON_EXIT);
694 671
695 JSRegExp::Flags flags = re->GetFlags(); 672 JSRegExp::Flags flags = re->GetFlags();
696 673
697 Handle<String> pattern(re->Pattern()); 674 Handle<String> pattern(re->Pattern());
698 StringShape shape(*pattern); 675 StringShape shape(*pattern);
699 if (!pattern->IsFlat(shape)) { 676 if (!pattern->IsFlat(shape)) {
700 pattern->Flatten(shape); 677 FlattenString(pattern);
701 } 678 }
702 679
703 RegExpCompileData compile_data; 680 RegExpCompileData compile_data;
704 FlatStringReader reader(pattern); 681 FlatStringReader reader(pattern);
705 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) { 682 if (!ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
706 // Throw an exception if we fail to parse the pattern. 683 // Throw an exception if we fail to parse the pattern.
707 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once. 684 // THIS SHOULD NOT HAPPEN. We already parsed it successfully once.
708 ThrowRegExpException(re, 685 ThrowRegExpException(re,
709 pattern, 686 pattern,
710 compile_data.error, 687 compile_data.error,
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
817 int number_of_registers = IrregexpNumberOfRegisters(irregexp); 794 int number_of_registers = IrregexpNumberOfRegisters(irregexp);
818 OffsetsVector offsets(number_of_registers); 795 OffsetsVector offsets(number_of_registers);
819 796
820 int previous_index = 0; 797 int previous_index = 0;
821 798
822 Handle<JSArray> result = Factory::NewJSArray(0); 799 Handle<JSArray> result = Factory::NewJSArray(0);
823 int i = 0; 800 int i = 0;
824 Handle<Object> matches; 801 Handle<Object> matches;
825 802
826 if (!subject->IsFlat(shape)) { 803 if (!subject->IsFlat(shape)) {
827 subject->Flatten(shape); 804 FlattenString(subject);
828 } 805 }
829 806
830 while (true) { 807 while (true) {
831 if (previous_index > subject->length() || previous_index < 0) { 808 if (previous_index > subject->length() || previous_index < 0) {
832 // Per ECMA-262 15.10.6.2, if the previous index is greater than the 809 // Per ECMA-262 15.10.6.2, if the previous index is greater than the
833 // string length, there is no match. 810 // string length, there is no match.
834 matches = Factory::null_value(); 811 matches = Factory::null_value();
835 return result; 812 return result;
836 } else { 813 } else {
837 #ifdef DEBUG 814 #ifdef DEBUG
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
913 address = reinterpret_cast<const byte*>(ext->resource()->data()); 890 address = reinterpret_cast<const byte*>(ext->resource()->data());
914 } 891 }
915 res = RegExpMacroAssemblerIA32::Execute( 892 res = RegExpMacroAssemblerIA32::Execute(
916 *code, 893 *code,
917 const_cast<Address*>(&address), 894 const_cast<Address*>(&address),
918 start_offset << char_size_shift, 895 start_offset << char_size_shift,
919 end_offset << char_size_shift, 896 end_offset << char_size_shift,
920 offsets_vector, 897 offsets_vector,
921 previous_index == 0); 898 previous_index == 0);
922 } else { // Sequential string 899 } else { // Sequential string
900 ASSERT(StringShape(*subject).IsSequential());
923 Address char_address = 901 Address char_address =
924 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() 902 is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
925 : SeqTwoByteString::cast(*subject)->GetCharsAddress(); 903 : SeqTwoByteString::cast(*subject)->GetCharsAddress();
926 int byte_offset = char_address - reinterpret_cast<Address>(*subject); 904 int byte_offset = char_address - reinterpret_cast<Address>(*subject);
927 res = RegExpMacroAssemblerIA32::Execute( 905 res = RegExpMacroAssemblerIA32::Execute(
928 *code, 906 *code,
929 reinterpret_cast<Address*>(subject.location()), 907 reinterpret_cast<Address*>(subject.location()),
930 byte_offset + (start_offset << char_size_shift), 908 byte_offset + (start_offset << char_size_shift),
931 byte_offset + (end_offset << char_size_shift), 909 byte_offset + (end_offset << char_size_shift),
932 offsets_vector, 910 offsets_vector,
(...skipping 257 matching lines...) Expand 10 before | Expand all | Expand 10 after
1190 cons.BuildTable(this); 1168 cons.BuildTable(this);
1191 } 1169 }
1192 return table_; 1170 return table_;
1193 } 1171 }
1194 1172
1195 1173
1196 class RegExpCompiler { 1174 class RegExpCompiler {
1197 public: 1175 public:
1198 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii); 1176 RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii);
1199 1177
1200 int AllocateRegister() { return next_register_++; } 1178 int AllocateRegister() {
1179 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
1180 reg_exp_too_big_ = true;
1181 return next_register_;
1182 }
1183 return next_register_++;
1184 }
1201 1185
1202 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, 1186 Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
1203 RegExpNode* start, 1187 RegExpNode* start,
1204 int capture_count, 1188 int capture_count,
1205 Handle<String> pattern); 1189 Handle<String> pattern);
1206 1190
1207 inline void AddWork(RegExpNode* node) { work_list_->Add(node); } 1191 inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
1208 1192
1209 static const int kImplementationOffset = 0; 1193 static const int kImplementationOffset = 0;
1210 static const int kNumberOfRegistersOffset = 0; 1194 static const int kNumberOfRegistersOffset = 0;
1211 static const int kCodeOffset = 1; 1195 static const int kCodeOffset = 1;
1212 1196
1213 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } 1197 RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
1214 EndNode* accept() { return accept_; } 1198 EndNode* accept() { return accept_; }
1215 1199
1216 static const int kMaxRecursion = 100; 1200 static const int kMaxRecursion = 100;
1217 inline int recursion_depth() { return recursion_depth_; } 1201 inline int recursion_depth() { return recursion_depth_; }
1218 inline void IncrementRecursionDepth() { recursion_depth_++; } 1202 inline void IncrementRecursionDepth() { recursion_depth_++; }
1219 inline void DecrementRecursionDepth() { recursion_depth_--; } 1203 inline void DecrementRecursionDepth() { recursion_depth_--; }
1220 1204
1205 void SetRegExpTooBig() { reg_exp_too_big_ = true; }
1206
1221 inline bool ignore_case() { return ignore_case_; } 1207 inline bool ignore_case() { return ignore_case_; }
1222 inline bool ascii() { return ascii_; } 1208 inline bool ascii() { return ascii_; }
1223 1209
1224 static const int kNoRegister = -1; 1210 static const int kNoRegister = -1;
1225 private: 1211 private:
1226 EndNode* accept_; 1212 EndNode* accept_;
1227 int next_register_; 1213 int next_register_;
1228 List<RegExpNode*>* work_list_; 1214 List<RegExpNode*>* work_list_;
1229 int recursion_depth_; 1215 int recursion_depth_;
1230 RegExpMacroAssembler* macro_assembler_; 1216 RegExpMacroAssembler* macro_assembler_;
1231 bool ignore_case_; 1217 bool ignore_case_;
1232 bool ascii_; 1218 bool ascii_;
1219 bool reg_exp_too_big_;
1233 }; 1220 };
1234 1221
1235 1222
1236 class RecursionCheck { 1223 class RecursionCheck {
1237 public: 1224 public:
1238 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) { 1225 explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
1239 compiler->IncrementRecursionDepth(); 1226 compiler->IncrementRecursionDepth();
1240 } 1227 }
1241 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); } 1228 ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
1242 private: 1229 private:
1243 RegExpCompiler* compiler_; 1230 RegExpCompiler* compiler_;
1244 }; 1231 };
1245 1232
1246 1233
1234 static Handle<FixedArray> IrregexpRegExpTooBig(Handle<String> pattern) {
1235 Handle<JSArray> array = Factory::NewJSArray(2);
1236 SetElement(array, 0, pattern);
1237 const char* message = "RegExp too big";
1238 SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message)));
1239 Handle<Object> regexp_err =
1240 Factory::NewSyntaxError("malformed_regexp", array);
1241 Top::Throw(*regexp_err);
1242 return Handle<FixedArray>();
1243 }
1244
1245
1247 // Attempts to compile the regexp using an Irregexp code generator. Returns 1246 // Attempts to compile the regexp using an Irregexp code generator. Returns
1248 // a fixed array or a null handle depending on whether it succeeded. 1247 // a fixed array or a null handle depending on whether it succeeded.
1249 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) 1248 RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
1250 : next_register_(2 * (capture_count + 1)), 1249 : next_register_(2 * (capture_count + 1)),
1251 work_list_(NULL), 1250 work_list_(NULL),
1252 recursion_depth_(0), 1251 recursion_depth_(0),
1253 ignore_case_(ignore_case), 1252 ignore_case_(ignore_case),
1254 ascii_(ascii) { 1253 ascii_(ascii),
1254 reg_exp_too_big_(false) {
1255 accept_ = new EndNode(EndNode::ACCEPT); 1255 accept_ = new EndNode(EndNode::ACCEPT);
1256 ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
1256 } 1257 }
1257 1258
1258 1259
1259 Handle<FixedArray> RegExpCompiler::Assemble( 1260 Handle<FixedArray> RegExpCompiler::Assemble(
1260 RegExpMacroAssembler* macro_assembler, 1261 RegExpMacroAssembler* macro_assembler,
1261 RegExpNode* start, 1262 RegExpNode* start,
1262 int capture_count, 1263 int capture_count,
1263 Handle<String> pattern) { 1264 Handle<String> pattern) {
1264 #ifdef DEBUG 1265 #ifdef DEBUG
1265 if (FLAG_trace_regexp_assembler) 1266 if (FLAG_trace_regexp_assembler)
1266 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); 1267 macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
1267 else 1268 else
1268 #endif 1269 #endif
1269 macro_assembler_ = macro_assembler; 1270 macro_assembler_ = macro_assembler;
1270 List <RegExpNode*> work_list(0); 1271 List <RegExpNode*> work_list(0);
1271 work_list_ = &work_list; 1272 work_list_ = &work_list;
1272 Label fail; 1273 Label fail;
1273 macro_assembler->PushBacktrack(&fail); 1274 macro_assembler->PushBacktrack(&fail);
1274 Trace new_trace; 1275 Trace new_trace;
1275 if (!start->Emit(this, &new_trace)) { 1276 start->Emit(this, &new_trace);
1276 fail.Unuse();
1277 return Handle<FixedArray>::null();
1278 }
1279 macro_assembler_->Bind(&fail); 1277 macro_assembler_->Bind(&fail);
1280 macro_assembler_->Fail(); 1278 macro_assembler_->Fail();
1281 while (!work_list.is_empty()) { 1279 while (!work_list.is_empty()) {
1282 if (!work_list.RemoveLast()->Emit(this, &new_trace)) { 1280 work_list.RemoveLast()->Emit(this, &new_trace);
1283 return Handle<FixedArray>::null();
1284 }
1285 } 1281 }
1282 if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern);
1286 Handle<FixedArray> array = 1283 Handle<FixedArray> array =
1287 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); 1284 Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
1288 array->set(RegExpImpl::kIrregexpImplementationIndex, 1285 array->set(RegExpImpl::kIrregexpImplementationIndex,
1289 Smi::FromInt(macro_assembler_->Implementation())); 1286 Smi::FromInt(macro_assembler_->Implementation()));
1290 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, 1287 array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex,
1291 Smi::FromInt(next_register_)); 1288 Smi::FromInt(next_register_));
1292 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, 1289 array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
1293 Smi::FromInt(capture_count)); 1290 Smi::FromInt(capture_count));
1294 Handle<Object> code = macro_assembler_->GetCode(pattern); 1291 Handle<Object> code = macro_assembler_->GetCode(pattern);
1295 array->set(RegExpImpl::kIrregexpCodeIndex, *code); 1292 array->set(RegExpImpl::kIrregexpCodeIndex, *code);
1296 work_list_ = NULL; 1293 work_list_ = NULL;
1297 #ifdef DEBUG 1294 #ifdef DEBUG
1298 if (FLAG_trace_regexp_assembler) { 1295 if (FLAG_trace_regexp_assembler) {
1299 delete macro_assembler_; 1296 delete macro_assembler_;
1300 } 1297 }
1301 #endif 1298 #endif
1302 return array; 1299 return array;
1303 } 1300 }
1304 1301
1302
1305 bool Trace::DeferredAction::Mentions(int that) { 1303 bool Trace::DeferredAction::Mentions(int that) {
1306 if (type() == ActionNode::CLEAR_CAPTURES) { 1304 if (type() == ActionNode::CLEAR_CAPTURES) {
1307 Interval range = static_cast<DeferredClearCaptures*>(this)->range(); 1305 Interval range = static_cast<DeferredClearCaptures*>(this)->range();
1308 return range.Contains(that); 1306 return range.Contains(that);
1309 } else { 1307 } else {
1310 return reg() == that; 1308 return reg() == that;
1311 } 1309 }
1312 } 1310 }
1313 1311
1314 1312
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1353 if (range.to() > max_register) max_register = range.to(); 1351 if (range.to() > max_register) max_register = range.to();
1354 } else { 1352 } else {
1355 affected_registers->Set(action->reg()); 1353 affected_registers->Set(action->reg());
1356 if (action->reg() > max_register) max_register = action->reg(); 1354 if (action->reg() > max_register) max_register = action->reg();
1357 } 1355 }
1358 } 1356 }
1359 return max_register; 1357 return max_register;
1360 } 1358 }
1361 1359
1362 1360
1363 void Trace::PushAffectedRegisters(RegExpMacroAssembler* assembler,
1364 int max_register,
1365 OutSet& affected_registers) {
1366 // Stay safe and check every half times the limit.
1367 // (Round up in case the limit is 1).
1368 int push_limit = (assembler->stack_limit_slack() + 1) / 2;
1369 for (int reg = 0, pushes = 0; reg <= max_register; reg++) {
1370 if (affected_registers.Get(reg)) {
1371 pushes++;
1372 RegExpMacroAssembler::StackCheckFlag check_stack_limit =
1373 (pushes % push_limit) == 0 ?
1374 RegExpMacroAssembler::kCheckStackLimit :
1375 RegExpMacroAssembler::kNoStackLimitCheck;
1376 assembler->PushRegister(reg, check_stack_limit);
1377 }
1378 }
1379 }
1380
1381
1382 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, 1361 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
1383 int max_register, 1362 int max_register,
1384 OutSet& affected_registers) { 1363 OutSet& registers_to_pop,
1364 OutSet& registers_to_clear) {
1385 for (int reg = max_register; reg >= 0; reg--) { 1365 for (int reg = max_register; reg >= 0; reg--) {
1386 if (affected_registers.Get(reg)) assembler->PopRegister(reg); 1366 if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
1367 else if (registers_to_clear.Get(reg)) {
1368 int clear_to = reg;
1369 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
1370 reg--;
1371 }
1372 assembler->ClearRegisters(reg, clear_to);
1373 }
1387 } 1374 }
1388 } 1375 }
1389 1376
1390 1377
1391 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, 1378 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
1392 int max_register, 1379 int max_register,
1393 OutSet& affected_registers) { 1380 OutSet& affected_registers,
1381 OutSet* registers_to_pop,
1382 OutSet* registers_to_clear) {
1383 // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
1384 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
1385
1394 for (int reg = 0; reg <= max_register; reg++) { 1386 for (int reg = 0; reg <= max_register; reg++) {
1395 if (!affected_registers.Get(reg)) { 1387 if (!affected_registers.Get(reg)) {
1396 continue; 1388 continue;
1397 } 1389 }
1390 // Count pushes performed to force a stack limit check occasionally.
1391 int pushes = 0;
1392
1393 // The chronologically first deferred action in the trace
1394 // is used to infer the action needed to restore a register
1395 // to its previous state (or not, if it's safe to ignore it).
1396 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
1397 DeferredActionUndoType undo_action = IGNORE;
1398
1398 int value = 0; 1399 int value = 0;
1399 bool absolute = false; 1400 bool absolute = false;
1400 bool clear = false; 1401 bool clear = false;
1401 int store_position = -1; 1402 int store_position = -1;
1402 // This is a little tricky because we are scanning the actions in reverse 1403 // This is a little tricky because we are scanning the actions in reverse
1403 // historical order (newest first). 1404 // historical order (newest first).
1404 for (DeferredAction* action = actions_; 1405 for (DeferredAction* action = actions_;
1405 action != NULL; 1406 action != NULL;
1406 action = action->next()) { 1407 action = action->next()) {
1407 if (action->Mentions(reg)) { 1408 if (action->Mentions(reg)) {
1408 switch (action->type()) { 1409 switch (action->type()) {
1409 case ActionNode::SET_REGISTER: { 1410 case ActionNode::SET_REGISTER: {
1410 Trace::DeferredSetRegister* psr = 1411 Trace::DeferredSetRegister* psr =
1411 static_cast<Trace::DeferredSetRegister*>(action); 1412 static_cast<Trace::DeferredSetRegister*>(action);
1412 value += psr->value(); 1413 if (!absolute) {
1413 absolute = true; 1414 value += psr->value();
1415 absolute = true;
1416 }
1417 // SET_REGISTER is currently only used for newly introduced loop
1418 // counters. They can have a significant previous value if they
1419 // occour in a loop. TODO(lrn): Propagate this information, so
1420 // we can set undo_action to IGNORE if we know there is no value to
1421 // restore.
1422 undo_action = RESTORE;
1414 ASSERT_EQ(store_position, -1); 1423 ASSERT_EQ(store_position, -1);
1415 ASSERT(!clear); 1424 ASSERT(!clear);
1416 break; 1425 break;
1417 } 1426 }
1418 case ActionNode::INCREMENT_REGISTER: 1427 case ActionNode::INCREMENT_REGISTER:
1419 if (!absolute) { 1428 if (!absolute) {
1420 value++; 1429 value++;
1421 } 1430 }
1422 ASSERT_EQ(store_position, -1); 1431 ASSERT_EQ(store_position, -1);
1423 ASSERT(!clear); 1432 ASSERT(!clear);
1433 undo_action = RESTORE;
1424 break; 1434 break;
1425 case ActionNode::STORE_POSITION: { 1435 case ActionNode::STORE_POSITION: {
1426 Trace::DeferredCapture* pc = 1436 Trace::DeferredCapture* pc =
1427 static_cast<Trace::DeferredCapture*>(action); 1437 static_cast<Trace::DeferredCapture*>(action);
1428 if (!clear && store_position == -1) { 1438 if (!clear && store_position == -1) {
1429 store_position = pc->cp_offset(); 1439 store_position = pc->cp_offset();
1430 } 1440 }
1441
1442 // For captures we know that stores and clears alternate.
1443 // Other register, are never cleared, and if the occur
1444 // inside a loop, they might be assigned more than once.
1445 if (reg <= 1) {
1446 // Registers zero and one, aka "capture zero", is
1447 // always set correctly if we succeed. There is no
1448 // need to undo a setting on backtrack, because we
1449 // will set it again or fail.
1450 undo_action = IGNORE;
1451 } else {
1452 undo_action = pc->is_capture() ? CLEAR : RESTORE;
1453 }
1431 ASSERT(!absolute); 1454 ASSERT(!absolute);
1432 ASSERT_EQ(value, 0); 1455 ASSERT_EQ(value, 0);
1433 break; 1456 break;
1434 } 1457 }
1435 case ActionNode::CLEAR_CAPTURES: { 1458 case ActionNode::CLEAR_CAPTURES: {
1436 // Since we're scanning in reverse order, if we've already 1459 // Since we're scanning in reverse order, if we've already
1437 // set the position we have to ignore historically earlier 1460 // set the position we have to ignore historically earlier
1438 // clearing operations. 1461 // clearing operations.
1439 if (store_position == -1) 1462 if (store_position == -1) {
1440 clear = true; 1463 clear = true;
1464 }
1465 undo_action = RESTORE;
1441 ASSERT(!absolute); 1466 ASSERT(!absolute);
1442 ASSERT_EQ(value, 0); 1467 ASSERT_EQ(value, 0);
1443 break; 1468 break;
1444 } 1469 }
1445 default: 1470 default:
1446 UNREACHABLE(); 1471 UNREACHABLE();
1447 break; 1472 break;
1448 } 1473 }
1449 } 1474 }
1450 } 1475 }
1476 // Prepare for the undo-action (e.g., push if it's going to be popped).
1477 if (undo_action == RESTORE) {
1478 pushes++;
1479 RegExpMacroAssembler::StackCheckFlag stack_check =
1480 RegExpMacroAssembler::kNoStackLimitCheck;
1481 if (pushes == push_limit) {
1482 stack_check = RegExpMacroAssembler::kCheckStackLimit;
1483 pushes = 0;
1484 }
1485
1486 assembler->PushRegister(reg, stack_check);
1487 registers_to_pop->Set(reg);
1488 } else if (undo_action == CLEAR) {
1489 registers_to_clear->Set(reg);
1490 }
1491 // Perform the chronologically last action (or accumulated increment)
1492 // for the register.
1451 if (store_position != -1) { 1493 if (store_position != -1) {
1452 assembler->WriteCurrentPositionToRegister(reg, store_position); 1494 assembler->WriteCurrentPositionToRegister(reg, store_position);
1453 } else if (clear) { 1495 } else if (clear) {
1454 assembler->ClearRegister(reg); 1496 assembler->ClearRegisters(reg, reg);
1455 } else if (absolute) { 1497 } else if (absolute) {
1456 assembler->SetRegister(reg, value); 1498 assembler->SetRegister(reg, value);
1457 } else if (value != 0) { 1499 } else if (value != 0) {
1458 assembler->AdvanceRegister(reg, value); 1500 assembler->AdvanceRegister(reg, value);
1459 } 1501 }
1460 } 1502 }
1461 } 1503 }
1462 1504
1463 1505
1464 // This is called as we come into a loop choice node and some other tricky 1506 // This is called as we come into a loop choice node and some other tricky
1465 // nodes. It normalises the state of the code generator to ensure we can 1507 // nodes. It normalizes the state of the code generator to ensure we can
1466 // generate generic code. 1508 // generate generic code.
1467 bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) { 1509 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1468 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1510 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1469 1511
1470 ASSERT(actions_ != NULL || 1512 ASSERT(actions_ != NULL ||
1471 cp_offset_ != 0 || 1513 cp_offset_ != 0 ||
1472 backtrack() != NULL || 1514 backtrack() != NULL ||
1473 characters_preloaded_ != 0 || 1515 characters_preloaded_ != 0 ||
1474 quick_check_performed_.characters() != 0 || 1516 quick_check_performed_.characters() != 0 ||
1475 bound_checked_up_to_ != 0); 1517 bound_checked_up_to_ != 0);
1476 1518
1477 if (actions_ == NULL && backtrack() == NULL) { 1519 if (actions_ == NULL && backtrack() == NULL) {
1478 // Here we just have some deferred cp advances to fix and we are back to 1520 // Here we just have some deferred cp advances to fix and we are back to
1479 // a normal situation. We may also have to forget some information gained 1521 // a normal situation. We may also have to forget some information gained
1480 // through a quick check that was already performed. 1522 // through a quick check that was already performed.
1481 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_); 1523 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1482 // Create a new trivial state and generate the node with that. 1524 // Create a new trivial state and generate the node with that.
1483 Trace new_state; 1525 Trace new_state;
1484 return successor->Emit(compiler, &new_state); 1526 successor->Emit(compiler, &new_state);
1527 return;
1485 } 1528 }
1486 1529
1487 // Generate deferred actions here along with code to undo them again. 1530 // Generate deferred actions here along with code to undo them again.
1488 OutSet affected_registers; 1531 OutSet affected_registers;
1532
1489 int max_register = FindAffectedRegisters(&affected_registers); 1533 int max_register = FindAffectedRegisters(&affected_registers);
1490 PushAffectedRegisters(assembler, max_register, affected_registers); 1534 OutSet registers_to_pop;
1491 PerformDeferredActions(assembler, max_register, affected_registers); 1535 OutSet registers_to_clear;
1536 PerformDeferredActions(assembler,
1537 max_register,
1538 affected_registers,
1539 &registers_to_pop,
1540 &registers_to_clear);
1492 if (backtrack() != NULL) { 1541 if (backtrack() != NULL) {
1493 // Here we have a concrete backtrack location. These are set up by choice 1542 // Here we have a concrete backtrack location. These are set up by choice
1494 // nodes and so they indicate that we have a deferred save of the current 1543 // nodes and so they indicate that we have a deferred save of the current
1495 // position which we may need to emit here. 1544 // position which we may need to emit here.
1496 assembler->PushCurrentPosition(); 1545 assembler->PushCurrentPosition();
1497 } 1546 }
1498 if (cp_offset_ != 0) { 1547 if (cp_offset_ != 0) {
1499 assembler->AdvanceCurrentPosition(cp_offset_); 1548 assembler->AdvanceCurrentPosition(cp_offset_);
1500 } 1549 }
1501 1550
1502 // Create a new trivial state and generate the node with that. 1551 // Create a new trivial state and generate the node with that.
1503 Label undo; 1552 Label undo;
1504 assembler->PushBacktrack(&undo); 1553 assembler->PushBacktrack(&undo);
1505 Trace new_state; 1554 Trace new_state;
1506 bool ok = successor->Emit(compiler, &new_state); 1555 successor->Emit(compiler, &new_state);
1507 1556
1508 // On backtrack we need to restore state. 1557 // On backtrack we need to restore state.
1509 assembler->Bind(&undo); 1558 assembler->Bind(&undo);
1510 if (!ok) return false;
1511 if (backtrack() != NULL) { 1559 if (backtrack() != NULL) {
1512 assembler->PopCurrentPosition(); 1560 assembler->PopCurrentPosition();
1513 } 1561 }
1514 RestoreAffectedRegisters(assembler, max_register, affected_registers); 1562 RestoreAffectedRegisters(assembler,
1563 max_register,
1564 registers_to_pop,
1565 registers_to_clear);
1515 if (backtrack() == NULL) { 1566 if (backtrack() == NULL) {
1516 assembler->Backtrack(); 1567 assembler->Backtrack();
1517 } else { 1568 } else {
1518 assembler->GoTo(backtrack()); 1569 assembler->GoTo(backtrack());
1519 } 1570 }
1520
1521 return true;
1522 } 1571 }
1523 1572
1524 1573
1525 void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, Trace* trace) { 1574 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
1526 if (info()->at_end) { 1575 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1527 Label succeed; 1576
1528 // LoadCurrentCharacter will go to the label if we are at the end of the 1577 // Omit flushing the trace. We discard the entire stack frame anyway.
1529 // input string. 1578
1530 assembler->LoadCurrentCharacter(0, &succeed); 1579 if (!label()->is_bound()) {
1531 assembler->GoTo(trace->backtrack()); 1580 // We are completely independent of the trace, since we ignore it,
1532 assembler->Bind(&succeed); 1581 // so this code can be used as the generic version.
1582 assembler->Bind(label());
1533 } 1583 }
1584
1585 // Throw away everything on the backtrack stack since the start
1586 // of the negative submatch and restore the character position.
1587 assembler->ReadCurrentPositionFromRegister(current_position_register_);
1588 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
1589 if (clear_capture_count_ > 0) {
1590 // Clear any captures that might have been performed during the success
1591 // of the body of the negative look-ahead.
1592 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
1593 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
1594 }
1595 // Now that we have unwound the stack we find at the top of the stack the
1596 // backtrack that the BeginSubmatch node got.
1597 assembler->Backtrack();
1534 } 1598 }
1535 1599
1536 1600
1537 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { 1601 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1538 if (!trace->is_trivial()) { 1602 if (!trace->is_trivial()) {
1539 return trace->Flush(compiler, this); 1603 trace->Flush(compiler, this);
1604 return;
1540 } 1605 }
1541 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1606 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1542 if (!label()->is_bound()) { 1607 if (!label()->is_bound()) {
1543 assembler->Bind(label());
1544 }
1545 EmitInfoChecks(assembler, trace);
1546 assembler->ReadCurrentPositionFromRegister(current_position_register_);
1547 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
1548 // Now that we have unwound the stack we find at the top of the stack the
1549 // backtrack that the BeginSubmatch node got.
1550 assembler->Backtrack();
1551 return true;
1552 }
1553
1554
1555 bool EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1556 if (!trace->is_trivial()) {
1557 return trace->Flush(compiler, this);
1558 }
1559 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1560 if (!label()->is_bound()) {
1561 assembler->Bind(label()); 1608 assembler->Bind(label());
1562 } 1609 }
1563 switch (action_) { 1610 switch (action_) {
1564 case ACCEPT: 1611 case ACCEPT:
1565 EmitInfoChecks(assembler, trace);
1566 assembler->Succeed(); 1612 assembler->Succeed();
1567 return true; 1613 return;
1568 case BACKTRACK: 1614 case BACKTRACK:
1569 ASSERT(!info()->at_end);
1570 assembler->GoTo(trace->backtrack()); 1615 assembler->GoTo(trace->backtrack());
1571 return true; 1616 return;
1572 case NEGATIVE_SUBMATCH_SUCCESS: 1617 case NEGATIVE_SUBMATCH_SUCCESS:
1573 // This case is handled in a different virtual method. 1618 // This case is handled in a different virtual method.
1574 UNREACHABLE(); 1619 UNREACHABLE();
1575 } 1620 }
1576 UNIMPLEMENTED(); 1621 UNIMPLEMENTED();
1577 return false;
1578 } 1622 }
1579 1623
1580 1624
1581 void GuardedAlternative::AddGuard(Guard* guard) { 1625 void GuardedAlternative::AddGuard(Guard* guard) {
1582 if (guards_ == NULL) 1626 if (guards_ == NULL)
1583 guards_ = new ZoneList<Guard*>(1); 1627 guards_ = new ZoneList<Guard*>(1);
1584 guards_->Add(guard); 1628 guards_->Add(guard);
1585 } 1629 }
1586 1630
1587 1631
1588 ActionNode* ActionNode::SetRegister(int reg, 1632 ActionNode* ActionNode::SetRegister(int reg,
1589 int val, 1633 int val,
1590 RegExpNode* on_success) { 1634 RegExpNode* on_success) {
1591 ActionNode* result = new ActionNode(SET_REGISTER, on_success); 1635 ActionNode* result = new ActionNode(SET_REGISTER, on_success);
1592 result->data_.u_store_register.reg = reg; 1636 result->data_.u_store_register.reg = reg;
1593 result->data_.u_store_register.value = val; 1637 result->data_.u_store_register.value = val;
1594 return result; 1638 return result;
1595 } 1639 }
1596 1640
1597 1641
1598 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) { 1642 ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
1599 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success); 1643 ActionNode* result = new ActionNode(INCREMENT_REGISTER, on_success);
1600 result->data_.u_increment_register.reg = reg; 1644 result->data_.u_increment_register.reg = reg;
1601 return result; 1645 return result;
1602 } 1646 }
1603 1647
1604 1648
1605 ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { 1649 ActionNode* ActionNode::StorePosition(int reg,
1650 bool is_capture,
1651 RegExpNode* on_success) {
1606 ActionNode* result = new ActionNode(STORE_POSITION, on_success); 1652 ActionNode* result = new ActionNode(STORE_POSITION, on_success);
1607 result->data_.u_position_register.reg = reg; 1653 result->data_.u_position_register.reg = reg;
1654 result->data_.u_position_register.is_capture = is_capture;
1608 return result; 1655 return result;
1609 } 1656 }
1610 1657
1611 1658
1612 ActionNode* ActionNode::ClearCaptures(Interval range, 1659 ActionNode* ActionNode::ClearCaptures(Interval range,
1613 RegExpNode* on_success) { 1660 RegExpNode* on_success) {
1614 ActionNode* result = new ActionNode(CLEAR_CAPTURES, on_success); 1661 ActionNode* result = new ActionNode(CLEAR_CAPTURES, on_success);
1615 result->data_.u_clear_captures.range_from = range.from(); 1662 result->data_.u_clear_captures.range_from = range.from();
1616 result->data_.u_clear_captures.range_to = range.to(); 1663 result->data_.u_clear_captures.range_to = range.to();
1617 return result; 1664 return result;
1618 } 1665 }
1619 1666
1620 1667
1621 ActionNode* ActionNode::BeginSubmatch(int stack_reg, 1668 ActionNode* ActionNode::BeginSubmatch(int stack_reg,
1622 int position_reg, 1669 int position_reg,
1623 RegExpNode* on_success) { 1670 RegExpNode* on_success) {
1624 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success); 1671 ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
1625 result->data_.u_submatch.stack_pointer_register = stack_reg; 1672 result->data_.u_submatch.stack_pointer_register = stack_reg;
1626 result->data_.u_submatch.current_position_register = position_reg; 1673 result->data_.u_submatch.current_position_register = position_reg;
1627 return result; 1674 return result;
1628 } 1675 }
1629 1676
1630 1677
1631 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, 1678 ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
1632 int position_reg, 1679 int position_reg,
1680 int clear_register_count,
1681 int clear_register_from,
1633 RegExpNode* on_success) { 1682 RegExpNode* on_success) {
1634 ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success); 1683 ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
1635 result->data_.u_submatch.stack_pointer_register = stack_reg; 1684 result->data_.u_submatch.stack_pointer_register = stack_reg;
1636 result->data_.u_submatch.current_position_register = position_reg; 1685 result->data_.u_submatch.current_position_register = position_reg;
1686 result->data_.u_submatch.clear_register_count = clear_register_count;
1687 result->data_.u_submatch.clear_register_from = clear_register_from;
1637 return result; 1688 return result;
1638 } 1689 }
1639 1690
1640 1691
1641 ActionNode* ActionNode::EmptyMatchCheck(int start_register, 1692 ActionNode* ActionNode::EmptyMatchCheck(int start_register,
1642 int repetition_register, 1693 int repetition_register,
1643 int repetition_limit, 1694 int repetition_limit,
1644 RegExpNode* on_success) { 1695 RegExpNode* on_success) {
1645 ActionNode* result = new ActionNode(EMPTY_MATCH_CHECK, on_success); 1696 ActionNode* result = new ActionNode(EMPTY_MATCH_CHECK, on_success);
1646 result->data_.u_empty_match_check.start_register = start_register; 1697 result->data_.u_empty_match_check.start_register = start_register;
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after
1842 } 1893 }
1843 last_valid_range--; 1894 last_valid_range--;
1844 } 1895 }
1845 1896
1846 if (last_valid_range < 0) { 1897 if (last_valid_range < 0) {
1847 if (!cc->is_negated()) { 1898 if (!cc->is_negated()) {
1848 // TODO(plesner): We can remove this when the node level does our 1899 // TODO(plesner): We can remove this when the node level does our
1849 // ASCII optimizations for us. 1900 // ASCII optimizations for us.
1850 macro_assembler->GoTo(on_failure); 1901 macro_assembler->GoTo(on_failure);
1851 } 1902 }
1903 if (check_offset) {
1904 macro_assembler->CheckPosition(cp_offset, on_failure);
1905 }
1852 return; 1906 return;
1853 } 1907 }
1854 1908
1855 if (last_valid_range == 0 && 1909 if (last_valid_range == 0 &&
1856 !cc->is_negated() && 1910 !cc->is_negated() &&
1857 ranges->at(0).IsEverything(max_char)) { 1911 ranges->at(0).IsEverything(max_char)) {
1858 // This is a common case hit by non-anchored expressions. 1912 // This is a common case hit by non-anchored expressions.
1859 // TODO(erikcorry): We should have a macro assembler instruction that just
1860 // checks for end of string without loading the character.
1861 if (check_offset) { 1913 if (check_offset) {
1862 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); 1914 macro_assembler->CheckPosition(cp_offset, on_failure);
1863 } 1915 }
1864 return; 1916 return;
1865 } 1917 }
1866 1918
1867 if (!preloaded) { 1919 if (!preloaded) {
1868 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); 1920 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
1869 } 1921 }
1870 1922
1871 for (int i = 0; i < last_valid_range; i++) { 1923 for (int i = 0; i < last_valid_range; i++) {
1872 CharacterRange& range = ranges->at(i); 1924 CharacterRange& range = ranges->at(i);
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
1928 macro_assembler->Bind(&success); 1980 macro_assembler->Bind(&success);
1929 } 1981 }
1930 1982
1931 1983
1932 RegExpNode::~RegExpNode() { 1984 RegExpNode::~RegExpNode() {
1933 } 1985 }
1934 1986
1935 1987
1936 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 1988 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1937 Trace* trace) { 1989 Trace* trace) {
1938 // TODO(erikcorry): Implement support.
1939 if (info_.follows_word_interest ||
1940 info_.follows_newline_interest ||
1941 info_.follows_start_interest) {
1942 return FAIL;
1943 }
1944
1945 // If we are generating a greedy loop then don't stop and don't reuse code. 1990 // If we are generating a greedy loop then don't stop and don't reuse code.
1946 if (trace->stop_node() != NULL) { 1991 if (trace->stop_node() != NULL) {
1947 return CONTINUE; 1992 return CONTINUE;
1948 } 1993 }
1949 1994
1950 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1995 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1951 if (trace->is_trivial()) { 1996 if (trace->is_trivial()) {
1952 if (label_.is_bound()) { 1997 if (label_.is_bound()) {
1953 // We are being asked to generate a generic version, but that's already 1998 // We are being asked to generate a generic version, but that's already
1954 // been done so just go to it. 1999 // been done so just go to it.
(...skipping 16 matching lines...) Expand all
1971 // non-generic versions we generate so as not to overdo it. 2016 // non-generic versions we generate so as not to overdo it.
1972 trace_count_++; 2017 trace_count_++;
1973 if (trace_count_ < kMaxCopiesCodeGenerated && 2018 if (trace_count_ < kMaxCopiesCodeGenerated &&
1974 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) { 2019 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
1975 return CONTINUE; 2020 return CONTINUE;
1976 } 2021 }
1977 2022
1978 // If we get here code has been generated for this node too many times or 2023 // If we get here code has been generated for this node too many times or
1979 // recursion is too deep. Time to switch to a generic version. The code for 2024 // recursion is too deep. Time to switch to a generic version. The code for
1980 // generic versions above can handle deep recursion properly. 2025 // generic versions above can handle deep recursion properly.
1981 bool ok = trace->Flush(compiler, this); 2026 trace->Flush(compiler, this);
1982 return ok ? DONE : FAIL; 2027 return DONE;
1983 } 2028 }
1984 2029
1985 2030
1986 int ActionNode::EatsAtLeast(int recursion_depth) { 2031 int ActionNode::EatsAtLeast(int still_to_find, int recursion_depth) {
1987 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2032 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
1988 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 2033 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
1989 return on_success()->EatsAtLeast(recursion_depth + 1); 2034 return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
1990 } 2035 }
1991 2036
1992 2037
1993 int TextNode::EatsAtLeast(int recursion_depth) { 2038 int AssertionNode::EatsAtLeast(int still_to_find, int recursion_depth) {
1994 int answer = Length(); 2039 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
1995 if (answer >= 4) return answer; 2040 return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
1996 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
1997 return answer + on_success()->EatsAtLeast(recursion_depth + 1);
1998 } 2041 }
1999 2042
2000 2043
2001 int ChoiceNode::EatsAtLeastHelper(int recursion_depth, 2044 int BackReferenceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
2045 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2046 return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1);
2047 }
2048
2049
2050 int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) {
2051 int answer = Length();
2052 if (answer >= still_to_find) return answer;
2053 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
2054 return answer + on_success()->EatsAtLeast(still_to_find - answer,
2055 recursion_depth + 1);
2056 }
2057
2058
2059 int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find,
2060 int recursion_depth) {
2061 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2062 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2063 // afterwards.
2064 RegExpNode* node = alternatives_->at(1).node();
2065 return node->EatsAtLeast(still_to_find, recursion_depth + 1);
2066 }
2067
2068
2069 void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
2070 QuickCheckDetails* details,
2071 RegExpCompiler* compiler,
2072 int filled_in) {
2073 // Alternative 0 is the negative lookahead, alternative 1 is what comes
2074 // afterwards.
2075 RegExpNode* node = alternatives_->at(1).node();
2076 return node->GetQuickCheckDetails(details, compiler, filled_in);
2077 }
2078
2079
2080 int ChoiceNode::EatsAtLeastHelper(int still_to_find,
2081 int recursion_depth,
2002 RegExpNode* ignore_this_node) { 2082 RegExpNode* ignore_this_node) {
2003 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 2083 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
2004 int min = 100; 2084 int min = 100;
2005 int choice_count = alternatives_->length(); 2085 int choice_count = alternatives_->length();
2006 for (int i = 0; i < choice_count; i++) { 2086 for (int i = 0; i < choice_count; i++) {
2007 RegExpNode* node = alternatives_->at(i).node(); 2087 RegExpNode* node = alternatives_->at(i).node();
2008 if (node == ignore_this_node) continue; 2088 if (node == ignore_this_node) continue;
2009 int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1); 2089 int node_eats_at_least = node->EatsAtLeast(still_to_find,
2090 recursion_depth + 1);
2010 if (node_eats_at_least < min) min = node_eats_at_least; 2091 if (node_eats_at_least < min) min = node_eats_at_least;
2011 } 2092 }
2012 return min; 2093 return min;
2013 } 2094 }
2014 2095
2015 2096
2016 int LoopChoiceNode::EatsAtLeast(int recursion_depth) { 2097 int LoopChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
2017 return EatsAtLeastHelper(recursion_depth, loop_node_); 2098 return EatsAtLeastHelper(still_to_find, recursion_depth, loop_node_);
2018 } 2099 }
2019 2100
2020 2101
2021 int ChoiceNode::EatsAtLeast(int recursion_depth) { 2102 int ChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) {
2022 return EatsAtLeastHelper(recursion_depth, NULL); 2103 return EatsAtLeastHelper(still_to_find, recursion_depth, NULL);
2023 } 2104 }
2024 2105
2025 2106
2026 // Takes the left-most 1-bit and smears it out, setting all bits to its right. 2107 // Takes the left-most 1-bit and smears it out, setting all bits to its right.
2027 static inline uint32_t SmearBitsRight(uint32_t v) { 2108 static inline uint32_t SmearBitsRight(uint32_t v) {
2028 v |= v >> 1; 2109 v |= v >> 1;
2029 v |= v >> 2; 2110 v |= v >> 2;
2030 v |= v >> 4; 2111 v |= v >> 4;
2031 v |= v >> 8; 2112 v |= v >> 8;
2032 v |= v >> 16; 2113 v |= v >> 16;
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
2250 for (int i = 0; i < characters_; i++) { 2331 for (int i = 0; i < characters_; i++) {
2251 positions_[i].mask = 0; 2332 positions_[i].mask = 0;
2252 positions_[i].value = 0; 2333 positions_[i].value = 0;
2253 positions_[i].determines_perfectly = false; 2334 positions_[i].determines_perfectly = false;
2254 } 2335 }
2255 characters_ = 0; 2336 characters_ = 0;
2256 } 2337 }
2257 2338
2258 2339
2259 void QuickCheckDetails::Advance(int by, bool ascii) { 2340 void QuickCheckDetails::Advance(int by, bool ascii) {
2260 ASSERT(by > 0); 2341 ASSERT(by >= 0);
2261 if (by >= characters_) { 2342 if (by >= characters_) {
2262 Clear(); 2343 Clear();
2263 return; 2344 return;
2264 } 2345 }
2265 for (int i = 0; i < characters_ - by; i++) { 2346 for (int i = 0; i < characters_ - by; i++) {
2266 positions_[i] = positions_[by + i]; 2347 positions_[i] = positions_[by + i];
2267 } 2348 }
2268 for (int i = characters_ - by; i < characters_; i++) { 2349 for (int i = characters_ - by; i < characters_; i++) {
2269 positions_[i].mask = 0; 2350 positions_[i].mask = 0;
2270 positions_[i].value = 0; 2351 positions_[i].value = 0;
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
2335 for (int i = 1; i < choice_count; i++) { 2416 for (int i = 1; i < choice_count; i++) {
2336 QuickCheckDetails new_details(details->characters()); 2417 QuickCheckDetails new_details(details->characters());
2337 RegExpNode* node = alternatives_->at(i).node(); 2418 RegExpNode* node = alternatives_->at(i).node();
2338 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in); 2419 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
2339 // Here we merge the quick match details of the two branches. 2420 // Here we merge the quick match details of the two branches.
2340 details->Merge(&new_details, characters_filled_in); 2421 details->Merge(&new_details, characters_filled_in);
2341 } 2422 }
2342 } 2423 }
2343 2424
2344 2425
2426 // Check for [0-9A-Z_a-z].
2427 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2428 Label* word,
2429 Label* non_word,
2430 bool fall_through_on_word) {
2431 assembler->CheckCharacterGT('z', non_word);
2432 assembler->CheckCharacterLT('0', non_word);
2433 assembler->CheckCharacterGT('a' - 1, word);
2434 assembler->CheckCharacterLT('9' + 1, word);
2435 assembler->CheckCharacterLT('A', non_word);
2436 assembler->CheckCharacterLT('Z' + 1, word);
2437 if (fall_through_on_word) {
2438 assembler->CheckNotCharacter('_', non_word);
2439 } else {
2440 assembler->CheckCharacter('_', word);
2441 }
2442 }
2443
2444
2445 // Emit the code to check for a ^ in multiline mode (1-character lookbehind
2446 // that matches newline or the start of input).
2447 static void EmitHat(RegExpCompiler* compiler,
2448 RegExpNode* on_success,
2449 Trace* trace) {
2450 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2451 // We will be loading the previous character into the current character
2452 // register.
2453 Trace new_trace(*trace);
2454 new_trace.InvalidateCurrentCharacter();
2455
2456 Label ok;
2457 if (new_trace.cp_offset() == 0) {
2458 // The start of input counts as a newline in this context, so skip to
2459 // ok if we are at the start.
2460 assembler->CheckAtStart(&ok);
2461 }
2462 // We already checked that we are not at the start of input so it must be
2463 // OK to load the previous character.
2464 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
2465 new_trace.backtrack(),
2466 false);
2467 // Newline means \n, \r, 0x2028 or 0x2029.
2468 if (!compiler->ascii()) {
2469 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
2470 }
2471 assembler->CheckCharacter('\n', &ok);
2472 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2473 assembler->Bind(&ok);
2474 on_success->Emit(compiler, &new_trace);
2475 }
2476
2477
2478 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
2479 static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,
2480 RegExpCompiler* compiler,
2481 RegExpNode* on_success,
2482 Trace* trace) {
2483 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2484 Label before_non_word;
2485 Label before_word;
2486 if (trace->characters_preloaded() != 1) {
2487 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2488 }
2489 // Fall through on non-word.
2490 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2491
2492 // We will be loading the previous character into the current character
2493 // register.
2494 Trace new_trace(*trace);
2495 new_trace.InvalidateCurrentCharacter();
2496
2497 Label ok;
2498 Label* boundary;
2499 Label* not_boundary;
2500 if (type == AssertionNode::AT_BOUNDARY) {
2501 boundary = &ok;
2502 not_boundary = new_trace.backtrack();
2503 } else {
2504 not_boundary = &ok;
2505 boundary = new_trace.backtrack();
2506 }
2507
2508 // Next character is not a word character.
2509 assembler->Bind(&before_non_word);
2510 if (new_trace.cp_offset() == 0) {
2511 // The start of input counts as a non-word character, so the question is
2512 // decided if we are at the start.
2513 assembler->CheckAtStart(not_boundary);
2514 }
2515 // We already checked that we are not at the start of input so it must be
2516 // OK to load the previous character.
2517 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2518 &ok, // Unused dummy label in this call.
2519 false);
2520 // Fall through on non-word.
2521 EmitWordCheck(assembler, boundary, not_boundary, false);
2522 assembler->GoTo(not_boundary);
2523
2524 // Next character is a word character.
2525 assembler->Bind(&before_word);
2526 if (new_trace.cp_offset() == 0) {
2527 // The start of input counts as a non-word character, so the question is
2528 // decided if we are at the start.
2529 assembler->CheckAtStart(boundary);
2530 }
2531 // We already checked that we are not at the start of input so it must be
2532 // OK to load the previous character.
2533 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2534 &ok, // Unused dummy label in this call.
2535 false);
2536 bool fall_through_on_word = (type == AssertionNode::AT_NON_BOUNDARY);
2537 EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word);
2538
2539 assembler->Bind(&ok);
2540
2541 on_success->Emit(compiler, &new_trace);
2542 }
2543
2544
2545 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2546 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2547 switch (type_) {
2548 case AT_END: {
2549 Label ok;
2550 assembler->CheckPosition(trace->cp_offset(), &ok);
2551 assembler->GoTo(trace->backtrack());
2552 assembler->Bind(&ok);
2553 break;
2554 }
2555 case AT_START:
2556 assembler->CheckNotAtStart(trace->backtrack());
2557 break;
2558 case AFTER_NEWLINE:
2559 EmitHat(compiler, on_success(), trace);
2560 return;
2561 case AT_NON_BOUNDARY:
2562 case AT_BOUNDARY:
2563 EmitBoundaryCheck(type_, compiler, on_success(), trace);
2564 return;
2565 }
2566 on_success()->Emit(compiler, trace);
2567 }
2568
2569
2345 // We call this repeatedly to generate code for each pass over the text node. 2570 // We call this repeatedly to generate code for each pass over the text node.
2346 // The passes are in increasing order of difficulty because we hope one 2571 // The passes are in increasing order of difficulty because we hope one
2347 // of the first passes will fail in which case we are saved the work of the 2572 // of the first passes will fail in which case we are saved the work of the
2348 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ 2573 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
2349 // we will check the '%' in the first pass, the case independent 'a' in the 2574 // we will check the '%' in the first pass, the case independent 'a' in the
2350 // second pass and the character class in the last pass. 2575 // second pass and the character class in the last pass.
2351 // 2576 //
2352 // The passes are done from right to left, so for example to test for /bar/ 2577 // The passes are done from right to left, so for example to test for /bar/
2353 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 2578 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
2354 // and then a 'b' with offset 0. This means we can avoid the end-of-input 2579 // and then a 'b' with offset 0. This means we can avoid the end-of-input
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
2474 } 2699 }
2475 } 2700 }
2476 2701
2477 2702
2478 // This generates the code to match a text node. A text node can contain 2703 // This generates the code to match a text node. A text node can contain
2479 // straight character sequences (possibly to be matched in a case-independent 2704 // straight character sequences (possibly to be matched in a case-independent
2480 // way) and character classes. For efficiency we do not do this in a single 2705 // way) and character classes. For efficiency we do not do this in a single
2481 // pass from left to right. Instead we pass over the text node several times, 2706 // pass from left to right. Instead we pass over the text node several times,
2482 // emitting code for some character positions every time. See the comment on 2707 // emitting code for some character positions every time. See the comment on
2483 // TextEmitPass for details. 2708 // TextEmitPass for details.
2484 bool TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2709 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2485 LimitResult limit_result = LimitVersions(compiler, trace); 2710 LimitResult limit_result = LimitVersions(compiler, trace);
2486 if (limit_result == FAIL) return false; 2711 if (limit_result == DONE) return;
2487 if (limit_result == DONE) return true;
2488 ASSERT(limit_result == CONTINUE); 2712 ASSERT(limit_result == CONTINUE);
2489 2713
2490 if (info()->follows_word_interest || 2714 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
2491 info()->follows_newline_interest || 2715 compiler->SetRegExpTooBig();
2492 info()->follows_start_interest) { 2716 return;
2493 return false;
2494 }
2495
2496 if (info()->at_end) {
2497 compiler->macro_assembler()->GoTo(trace->backtrack());
2498 return true;
2499 } 2717 }
2500 2718
2501 if (compiler->ascii()) { 2719 if (compiler->ascii()) {
2502 int dummy = 0; 2720 int dummy = 0;
2503 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy); 2721 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
2504 } 2722 }
2505 2723
2506 bool first_elt_done = false; 2724 bool first_elt_done = false;
2507 int bound_checked_to = trace->cp_offset() - 1; 2725 int bound_checked_to = trace->cp_offset() - 1;
2508 bound_checked_to += trace->bound_checked_up_to(); 2726 bound_checked_to += trace->bound_checked_up_to();
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
2548 &bound_checked_to); 2766 &bound_checked_to);
2549 } 2767 }
2550 TextEmitPass(compiler, 2768 TextEmitPass(compiler,
2551 CHARACTER_CLASS_MATCH, 2769 CHARACTER_CLASS_MATCH,
2552 false, 2770 false,
2553 trace, 2771 trace,
2554 first_elt_done, 2772 first_elt_done,
2555 &bound_checked_to); 2773 &bound_checked_to);
2556 2774
2557 Trace successor_trace(*trace); 2775 Trace successor_trace(*trace);
2558 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler->ascii()); 2776 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
2559 RecursionCheck rc(compiler); 2777 RecursionCheck rc(compiler);
2560 return on_success()->Emit(compiler, &successor_trace); 2778 on_success()->Emit(compiler, &successor_trace);
2561 } 2779 }
2562 2780
2563 2781
2564 void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) { 2782 void Trace::InvalidateCurrentCharacter() {
2783 characters_preloaded_ = 0;
2784 }
2785
2786
2787 void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
2565 ASSERT(by > 0); 2788 ASSERT(by > 0);
2566 // We don't have an instruction for shifting the current character register 2789 // We don't have an instruction for shifting the current character register
2567 // down or for using a shifted value for anything so lets just forget that 2790 // down or for using a shifted value for anything so lets just forget that
2568 // we preloaded any characters into it. 2791 // we preloaded any characters into it.
2569 characters_preloaded_ = 0; 2792 characters_preloaded_ = 0;
2570 // Adjust the offsets of the quick check performed information. This 2793 // Adjust the offsets of the quick check performed information. This
2571 // information is used to find out what we already determined about the 2794 // information is used to find out what we already determined about the
2572 // characters by means of mask and compare. 2795 // characters by means of mask and compare.
2573 quick_check_performed_.Advance(by, ascii); 2796 quick_check_performed_.Advance(by, compiler->ascii());
2574 cp_offset_ += by; 2797 cp_offset_ += by;
2798 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
2799 compiler->SetRegExpTooBig();
2800 cp_offset_ = 0;
2801 }
2575 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); 2802 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
2576 } 2803 }
2577 2804
2578 2805
2579 void TextNode::MakeCaseIndependent() { 2806 void TextNode::MakeCaseIndependent() {
2580 int element_count = elms_->length(); 2807 int element_count = elms_->length();
2581 for (int i = 0; i < element_count; i++) { 2808 for (int i = 0; i < element_count; i++) {
2582 TextElement elm = elms_->at(i); 2809 TextElement elm = elms_->at(i);
2583 if (elm.type == TextElement::CHAR_CLASS) { 2810 if (elm.type == TextElement::CHAR_CLASS) {
2584 RegExpCharacterClass* cc = elm.data.u_char_class; 2811 RegExpCharacterClass* cc = elm.data.u_char_class;
(...skipping 24 matching lines...) Expand all
2609 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) { 2836 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) {
2610 int length = 0; 2837 int length = 0;
2611 RegExpNode* node = alternative->node(); 2838 RegExpNode* node = alternative->node();
2612 // Later we will generate code for all these text nodes using recursion 2839 // Later we will generate code for all these text nodes using recursion
2613 // so we have to limit the max number. 2840 // so we have to limit the max number.
2614 int recursion_depth = 0; 2841 int recursion_depth = 0;
2615 while (node != this) { 2842 while (node != this) {
2616 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { 2843 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2617 return kNodeIsTooComplexForGreedyLoops; 2844 return kNodeIsTooComplexForGreedyLoops;
2618 } 2845 }
2619 NodeInfo* info = node->info();
2620 if (info->follows_word_interest ||
2621 info->follows_newline_interest ||
2622 info->follows_start_interest) {
2623 return kNodeIsTooComplexForGreedyLoops;
2624 }
2625 int node_length = node->GreedyLoopTextLength(); 2846 int node_length = node->GreedyLoopTextLength();
2626 if (node_length == kNodeIsTooComplexForGreedyLoops) { 2847 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2627 return kNodeIsTooComplexForGreedyLoops; 2848 return kNodeIsTooComplexForGreedyLoops;
2628 } 2849 }
2629 length += node_length; 2850 length += node_length;
2630 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); 2851 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2631 node = seq_node->on_success(); 2852 node = seq_node->on_success();
2632 } 2853 }
2633 return length; 2854 return length;
2634 } 2855 }
2635 2856
2636 2857
2637 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) { 2858 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
2638 ASSERT_EQ(loop_node_, NULL); 2859 ASSERT_EQ(loop_node_, NULL);
2639 AddAlternative(alt); 2860 AddAlternative(alt);
2640 loop_node_ = alt.node(); 2861 loop_node_ = alt.node();
2641 } 2862 }
2642 2863
2643 2864
2644 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { 2865 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2645 ASSERT_EQ(continue_node_, NULL); 2866 ASSERT_EQ(continue_node_, NULL);
2646 AddAlternative(alt); 2867 AddAlternative(alt);
2647 continue_node_ = alt.node(); 2868 continue_node_ = alt.node();
2648 } 2869 }
2649 2870
2650 2871
2651 bool LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2872 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2652 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 2873 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2653 if (trace->stop_node() == this) { 2874 if (trace->stop_node() == this) {
2654 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); 2875 int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
2655 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); 2876 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
2656 // Update the counter-based backtracking info on the stack. This is an 2877 // Update the counter-based backtracking info on the stack. This is an
2657 // optimization for greedy loops (see below). 2878 // optimization for greedy loops (see below).
2658 ASSERT(trace->cp_offset() == text_length); 2879 ASSERT(trace->cp_offset() == text_length);
2659 macro_assembler->AdvanceCurrentPosition(text_length); 2880 macro_assembler->AdvanceCurrentPosition(text_length);
2660 macro_assembler->GoTo(trace->loop_label()); 2881 macro_assembler->GoTo(trace->loop_label());
2661 return true; 2882 return;
2662 } 2883 }
2663 ASSERT(trace->stop_node() == NULL); 2884 ASSERT(trace->stop_node() == NULL);
2664 if (!trace->is_trivial()) { 2885 if (!trace->is_trivial()) {
2665 return trace->Flush(compiler, this); 2886 trace->Flush(compiler, this);
2887 return;
2666 } 2888 }
2667 return ChoiceNode::Emit(compiler, trace); 2889 ChoiceNode::Emit(compiler, trace);
2668 } 2890 }
2669 2891
2670 2892
2671 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) { 2893 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) {
2672 int preload_characters = EatsAtLeast(0); 2894 int preload_characters = EatsAtLeast(4, 0);
2673 #ifdef CAN_READ_UNALIGNED 2895 #ifdef CAN_READ_UNALIGNED
2674 bool ascii = compiler->ascii(); 2896 bool ascii = compiler->ascii();
2675 if (ascii) { 2897 if (ascii) {
2676 if (preload_characters > 4) preload_characters = 4; 2898 if (preload_characters > 4) preload_characters = 4;
2677 // We can't preload 3 characters because there is no machine instruction 2899 // We can't preload 3 characters because there is no machine instruction
2678 // to do that. We can't just load 4 because we could be reading 2900 // to do that. We can't just load 4 because we could be reading
2679 // beyond the end of the string, which could cause a memory fault. 2901 // beyond the end of the string, which could cause a memory fault.
2680 if (preload_characters == 3) preload_characters = 2; 2902 if (preload_characters == 3) preload_characters = 2;
2681 } else { 2903 } else {
2682 if (preload_characters > 2) preload_characters = 2; 2904 if (preload_characters > 2) preload_characters = 2;
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
2810 * | / | 3032 * | / |
2811 * | R | 3033 * | R |
2812 * | / | 3034 * | / |
2813 * F VL | 3035 * F VL |
2814 * <------U | 3036 * <------U |
2815 * back |S | 3037 * back |S |
2816 * \______________/ 3038 * \______________/
2817 */ 3039 */
2818 3040
2819 3041
2820 bool ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3042 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2821 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3043 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2822 int choice_count = alternatives_->length(); 3044 int choice_count = alternatives_->length();
2823 #ifdef DEBUG 3045 #ifdef DEBUG
2824 for (int i = 0; i < choice_count - 1; i++) { 3046 for (int i = 0; i < choice_count - 1; i++) {
2825 GuardedAlternative alternative = alternatives_->at(i); 3047 GuardedAlternative alternative = alternatives_->at(i);
2826 ZoneList<Guard*>* guards = alternative.guards(); 3048 ZoneList<Guard*>* guards = alternative.guards();
2827 int guard_count = (guards == NULL) ? 0 : guards->length(); 3049 int guard_count = (guards == NULL) ? 0 : guards->length();
2828 for (int j = 0; j < guard_count; j++) { 3050 for (int j = 0; j < guard_count; j++) {
2829 ASSERT(!trace->mentions_reg(guards->at(j)->reg())); 3051 ASSERT(!trace->mentions_reg(guards->at(j)->reg()));
2830 } 3052 }
2831 } 3053 }
2832 #endif 3054 #endif
2833 3055
2834 LimitResult limit_result = LimitVersions(compiler, trace); 3056 LimitResult limit_result = LimitVersions(compiler, trace);
2835 if (limit_result == DONE) return true; 3057 if (limit_result == DONE) return;
2836 if (limit_result == FAIL) return false;
2837 ASSERT(limit_result == CONTINUE); 3058 ASSERT(limit_result == CONTINUE);
2838 3059
2839 RecursionCheck rc(compiler); 3060 RecursionCheck rc(compiler);
2840 3061
2841 Trace* current_trace = trace; 3062 Trace* current_trace = trace;
2842 3063
2843 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); 3064 int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
2844 bool greedy_loop = false; 3065 bool greedy_loop = false;
2845 Label greedy_loop_label; 3066 Label greedy_loop_label;
2846 Trace counter_backtrack_trace; 3067 Trace counter_backtrack_trace;
(...skipping 10 matching lines...) Expand all
2857 ASSERT(trace->stop_node() == NULL); 3078 ASSERT(trace->stop_node() == NULL);
2858 macro_assembler->PushCurrentPosition(); 3079 macro_assembler->PushCurrentPosition();
2859 current_trace = &counter_backtrack_trace; 3080 current_trace = &counter_backtrack_trace;
2860 Label greedy_match_failed; 3081 Label greedy_match_failed;
2861 Trace greedy_match_trace; 3082 Trace greedy_match_trace;
2862 greedy_match_trace.set_backtrack(&greedy_match_failed); 3083 greedy_match_trace.set_backtrack(&greedy_match_failed);
2863 Label loop_label; 3084 Label loop_label;
2864 macro_assembler->Bind(&loop_label); 3085 macro_assembler->Bind(&loop_label);
2865 greedy_match_trace.set_stop_node(this); 3086 greedy_match_trace.set_stop_node(this);
2866 greedy_match_trace.set_loop_label(&loop_label); 3087 greedy_match_trace.set_loop_label(&loop_label);
2867 bool ok = alternatives_->at(0).node()->Emit(compiler, 3088 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
2868 &greedy_match_trace);
2869 macro_assembler->Bind(&greedy_match_failed); 3089 macro_assembler->Bind(&greedy_match_failed);
2870 if (!ok) {
2871 greedy_loop_label.Unuse();
2872 return false;
2873 }
2874 } 3090 }
2875 3091
2876 Label second_choice; // For use in greedy matches. 3092 Label second_choice; // For use in greedy matches.
2877 macro_assembler->Bind(&second_choice); 3093 macro_assembler->Bind(&second_choice);
2878 3094
2879 int first_normal_choice = greedy_loop ? 1 : 0; 3095 int first_normal_choice = greedy_loop ? 1 : 0;
2880 3096
2881 int preload_characters = CalculatePreloadCharacters(compiler); 3097 int preload_characters = CalculatePreloadCharacters(compiler);
2882 bool preload_is_current = 3098 bool preload_is_current =
2883 (current_trace->characters_preloaded() == preload_characters); 3099 (current_trace->characters_preloaded() == preload_characters);
(...skipping 12 matching lines...) Expand all
2896 Trace new_trace(*current_trace); 3112 Trace new_trace(*current_trace);
2897 new_trace.set_characters_preloaded(preload_is_current ? 3113 new_trace.set_characters_preloaded(preload_is_current ?
2898 preload_characters : 3114 preload_characters :
2899 0); 3115 0);
2900 if (preload_has_checked_bounds) { 3116 if (preload_has_checked_bounds) {
2901 new_trace.set_bound_checked_up_to(preload_characters); 3117 new_trace.set_bound_checked_up_to(preload_characters);
2902 } 3118 }
2903 new_trace.quick_check_performed()->Clear(); 3119 new_trace.quick_check_performed()->Clear();
2904 alt_gen->expects_preload = preload_is_current; 3120 alt_gen->expects_preload = preload_is_current;
2905 bool generate_full_check_inline = false; 3121 bool generate_full_check_inline = false;
2906 if (alternative.node()->EmitQuickCheck(compiler, 3122 if (try_to_emit_quick_check_for_alternative(i) &&
3123 alternative.node()->EmitQuickCheck(compiler,
2907 &new_trace, 3124 &new_trace,
2908 preload_has_checked_bounds, 3125 preload_has_checked_bounds,
2909 &alt_gen->possible_success, 3126 &alt_gen->possible_success,
2910 &alt_gen->quick_check_details, 3127 &alt_gen->quick_check_details,
2911 i < choice_count - 1)) { 3128 i < choice_count - 1)) {
2912 // Quick check was generated for this choice. 3129 // Quick check was generated for this choice.
2913 preload_is_current = true; 3130 preload_is_current = true;
2914 preload_has_checked_bounds = true; 3131 preload_has_checked_bounds = true;
2915 // On the last choice in the ChoiceNode we generated the quick 3132 // On the last choice in the ChoiceNode we generated the quick
2916 // check to fall through on possible success. So now we need to 3133 // check to fall through on possible success. So now we need to
(...skipping 17 matching lines...) Expand all
2934 } 3151 }
2935 if (i < choice_count - 1) { 3152 if (i < choice_count - 1) {
2936 new_trace.set_backtrack(&alt_gen->after); 3153 new_trace.set_backtrack(&alt_gen->after);
2937 } 3154 }
2938 generate_full_check_inline = true; 3155 generate_full_check_inline = true;
2939 } 3156 }
2940 if (generate_full_check_inline) { 3157 if (generate_full_check_inline) {
2941 for (int j = 0; j < guard_count; j++) { 3158 for (int j = 0; j < guard_count; j++) {
2942 GenerateGuard(macro_assembler, guards->at(j), &new_trace); 3159 GenerateGuard(macro_assembler, guards->at(j), &new_trace);
2943 } 3160 }
2944 if (!alternative.node()->Emit(compiler, &new_trace)) { 3161 alternative.node()->Emit(compiler, &new_trace);
2945 greedy_loop_label.Unuse();
2946 return false;
2947 }
2948 preload_is_current = false; 3162 preload_is_current = false;
2949 } 3163 }
2950 macro_assembler->Bind(&alt_gen->after); 3164 macro_assembler->Bind(&alt_gen->after);
2951 } 3165 }
2952 if (greedy_loop) { 3166 if (greedy_loop) {
2953 macro_assembler->Bind(&greedy_loop_label); 3167 macro_assembler->Bind(&greedy_loop_label);
2954 // If we have unwound to the bottom then backtrack. 3168 // If we have unwound to the bottom then backtrack.
2955 macro_assembler->CheckGreedyLoop(trace->backtrack()); 3169 macro_assembler->CheckGreedyLoop(trace->backtrack());
2956 // Otherwise try the second priority at an earlier position. 3170 // Otherwise try the second priority at an earlier position.
2957 macro_assembler->AdvanceCurrentPosition(-text_length); 3171 macro_assembler->AdvanceCurrentPosition(-text_length);
2958 macro_assembler->GoTo(&second_choice); 3172 macro_assembler->GoTo(&second_choice);
2959 } 3173 }
2960 // At this point we need to generate slow checks for the alternatives where 3174 // At this point we need to generate slow checks for the alternatives where
2961 // the quick check was inlined. We can recognize these because the associated 3175 // the quick check was inlined. We can recognize these because the associated
2962 // label was bound. 3176 // label was bound.
2963 for (int i = first_normal_choice; i < choice_count - 1; i++) { 3177 for (int i = first_normal_choice; i < choice_count - 1; i++) {
2964 AlternativeGeneration* alt_gen = alt_gens.at(i); 3178 AlternativeGeneration* alt_gen = alt_gens.at(i);
2965 if (!EmitOutOfLineContinuation(compiler, 3179 EmitOutOfLineContinuation(compiler,
2966 current_trace, 3180 current_trace,
2967 alternatives_->at(i), 3181 alternatives_->at(i),
2968 alt_gen, 3182 alt_gen,
2969 preload_characters, 3183 preload_characters,
2970 alt_gens.at(i + 1)->expects_preload)) { 3184 alt_gens.at(i + 1)->expects_preload);
2971 return false;
2972 }
2973 } 3185 }
2974 return true;
2975 } 3186 }
2976 3187
2977 3188
2978 bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler, 3189 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
2979 Trace* trace, 3190 Trace* trace,
2980 GuardedAlternative alternative, 3191 GuardedAlternative alternative,
2981 AlternativeGeneration* alt_gen, 3192 AlternativeGeneration* alt_gen,
2982 int preload_characters, 3193 int preload_characters,
2983 bool next_expects_preload) { 3194 bool next_expects_preload) {
2984 if (!alt_gen->possible_success.is_linked()) return true; 3195 if (!alt_gen->possible_success.is_linked()) return;
2985 3196
2986 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 3197 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2987 macro_assembler->Bind(&alt_gen->possible_success); 3198 macro_assembler->Bind(&alt_gen->possible_success);
2988 Trace out_of_line_trace(*trace); 3199 Trace out_of_line_trace(*trace);
2989 out_of_line_trace.set_characters_preloaded(preload_characters); 3200 out_of_line_trace.set_characters_preloaded(preload_characters);
2990 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details); 3201 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
2991 ZoneList<Guard*>* guards = alternative.guards(); 3202 ZoneList<Guard*>* guards = alternative.guards();
2992 int guard_count = (guards == NULL) ? 0 : guards->length(); 3203 int guard_count = (guards == NULL) ? 0 : guards->length();
2993 if (next_expects_preload) { 3204 if (next_expects_preload) {
2994 Label reload_current_char; 3205 Label reload_current_char;
2995 out_of_line_trace.set_backtrack(&reload_current_char); 3206 out_of_line_trace.set_backtrack(&reload_current_char);
2996 for (int j = 0; j < guard_count; j++) { 3207 for (int j = 0; j < guard_count; j++) {
2997 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); 3208 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
2998 } 3209 }
2999 bool ok = alternative.node()->Emit(compiler, &out_of_line_trace); 3210 alternative.node()->Emit(compiler, &out_of_line_trace);
3000 macro_assembler->Bind(&reload_current_char); 3211 macro_assembler->Bind(&reload_current_char);
3001 // Reload the current character, since the next quick check expects that. 3212 // Reload the current character, since the next quick check expects that.
3002 // We don't need to check bounds here because we only get into this 3213 // We don't need to check bounds here because we only get into this
3003 // code through a quick check which already did the checked load. 3214 // code through a quick check which already did the checked load.
3004 macro_assembler->LoadCurrentCharacter(trace->cp_offset(), 3215 macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
3005 NULL, 3216 NULL,
3006 false, 3217 false,
3007 preload_characters); 3218 preload_characters);
3008 macro_assembler->GoTo(&(alt_gen->after)); 3219 macro_assembler->GoTo(&(alt_gen->after));
3009 return ok;
3010 } else { 3220 } else {
3011 out_of_line_trace.set_backtrack(&(alt_gen->after)); 3221 out_of_line_trace.set_backtrack(&(alt_gen->after));
3012 for (int j = 0; j < guard_count; j++) { 3222 for (int j = 0; j < guard_count; j++) {
3013 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace); 3223 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
3014 } 3224 }
3015 return alternative.node()->Emit(compiler, &out_of_line_trace); 3225 alternative.node()->Emit(compiler, &out_of_line_trace);
3016 } 3226 }
3017 } 3227 }
3018 3228
3019 3229
3020 bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3230 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3021 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3231 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3022 LimitResult limit_result = LimitVersions(compiler, trace); 3232 LimitResult limit_result = LimitVersions(compiler, trace);
3023 if (limit_result == DONE) return true; 3233 if (limit_result == DONE) return;
3024 if (limit_result == FAIL) return false;
3025 ASSERT(limit_result == CONTINUE); 3234 ASSERT(limit_result == CONTINUE);
3026 3235
3027 RecursionCheck rc(compiler); 3236 RecursionCheck rc(compiler);
3028 3237
3029 switch (type_) { 3238 switch (type_) {
3030 case STORE_POSITION: { 3239 case STORE_POSITION: {
3031 Trace::DeferredCapture 3240 Trace::DeferredCapture
3032 new_capture(data_.u_position_register.reg, trace); 3241 new_capture(data_.u_position_register.reg,
3242 data_.u_position_register.is_capture,
3243 trace);
3033 Trace new_trace = *trace; 3244 Trace new_trace = *trace;
3034 new_trace.add_action(&new_capture); 3245 new_trace.add_action(&new_capture);
3035 return on_success()->Emit(compiler, &new_trace); 3246 on_success()->Emit(compiler, &new_trace);
3247 break;
3036 } 3248 }
3037 case INCREMENT_REGISTER: { 3249 case INCREMENT_REGISTER: {
3038 Trace::DeferredIncrementRegister 3250 Trace::DeferredIncrementRegister
3039 new_increment(data_.u_increment_register.reg); 3251 new_increment(data_.u_increment_register.reg);
3040 Trace new_trace = *trace; 3252 Trace new_trace = *trace;
3041 new_trace.add_action(&new_increment); 3253 new_trace.add_action(&new_increment);
3042 return on_success()->Emit(compiler, &new_trace); 3254 on_success()->Emit(compiler, &new_trace);
3255 break;
3043 } 3256 }
3044 case SET_REGISTER: { 3257 case SET_REGISTER: {
3045 Trace::DeferredSetRegister 3258 Trace::DeferredSetRegister
3046 new_set(data_.u_store_register.reg, data_.u_store_register.value); 3259 new_set(data_.u_store_register.reg, data_.u_store_register.value);
3047 Trace new_trace = *trace; 3260 Trace new_trace = *trace;
3048 new_trace.add_action(&new_set); 3261 new_trace.add_action(&new_set);
3049 return on_success()->Emit(compiler, &new_trace); 3262 on_success()->Emit(compiler, &new_trace);
3263 break;
3050 } 3264 }
3051 case CLEAR_CAPTURES: { 3265 case CLEAR_CAPTURES: {
3052 Trace::DeferredClearCaptures 3266 Trace::DeferredClearCaptures
3053 new_capture(Interval(data_.u_clear_captures.range_from, 3267 new_capture(Interval(data_.u_clear_captures.range_from,
3054 data_.u_clear_captures.range_to)); 3268 data_.u_clear_captures.range_to));
3055 Trace new_trace = *trace; 3269 Trace new_trace = *trace;
3056 new_trace.add_action(&new_capture); 3270 new_trace.add_action(&new_capture);
3057 return on_success()->Emit(compiler, &new_trace); 3271 on_success()->Emit(compiler, &new_trace);
3272 break;
3058 } 3273 }
3059 case BEGIN_SUBMATCH: 3274 case BEGIN_SUBMATCH:
3060 if (!trace->is_trivial()) return trace->Flush(compiler, this); 3275 if (!trace->is_trivial()) {
3061 assembler->WriteCurrentPositionToRegister( 3276 trace->Flush(compiler, this);
3062 data_.u_submatch.current_position_register, 0); 3277 } else {
3063 assembler->WriteStackPointerToRegister( 3278 assembler->WriteCurrentPositionToRegister(
3064 data_.u_submatch.stack_pointer_register); 3279 data_.u_submatch.current_position_register, 0);
3065 return on_success()->Emit(compiler, trace); 3280 assembler->WriteStackPointerToRegister(
3281 data_.u_submatch.stack_pointer_register);
3282 on_success()->Emit(compiler, trace);
3283 }
3284 break;
3066 case EMPTY_MATCH_CHECK: { 3285 case EMPTY_MATCH_CHECK: {
3067 int start_pos_reg = data_.u_empty_match_check.start_register; 3286 int start_pos_reg = data_.u_empty_match_check.start_register;
3068 int stored_pos = 0; 3287 int stored_pos = 0;
3069 int rep_reg = data_.u_empty_match_check.repetition_register; 3288 int rep_reg = data_.u_empty_match_check.repetition_register;
3070 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister); 3289 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
3071 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos); 3290 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
3072 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) { 3291 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
3073 // If we know we haven't advanced and there is no minimum we 3292 // If we know we haven't advanced and there is no minimum we
3074 // can just backtrack immediately. 3293 // can just backtrack immediately.
3075 assembler->GoTo(trace->backtrack()); 3294 assembler->GoTo(trace->backtrack());
3076 return true;
3077 } else if (know_dist && stored_pos < trace->cp_offset()) { 3295 } else if (know_dist && stored_pos < trace->cp_offset()) {
3078 // If we know we've advanced we can generate the continuation 3296 // If we know we've advanced we can generate the continuation
3079 // immediately. 3297 // immediately.
3080 return on_success()->Emit(compiler, trace); 3298 on_success()->Emit(compiler, trace);
3299 } else if (!trace->is_trivial()) {
3300 trace->Flush(compiler, this);
3301 } else {
3302 Label skip_empty_check;
3303 // If we have a minimum number of repetitions we check the current
3304 // number first and skip the empty check if it's not enough.
3305 if (has_minimum) {
3306 int limit = data_.u_empty_match_check.repetition_limit;
3307 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3308 }
3309 // If the match is empty we bail out, otherwise we fall through
3310 // to the on-success continuation.
3311 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3312 trace->backtrack());
3313 assembler->Bind(&skip_empty_check);
3314 on_success()->Emit(compiler, trace);
3081 } 3315 }
3082 if (!trace->is_trivial()) return trace->Flush(compiler, this); 3316 break;
3083 Label skip_empty_check;
3084 // If we have a minimum number of repetitions we check the current
3085 // number first and skip the empty check if it's not enough.
3086 if (has_minimum) {
3087 int limit = data_.u_empty_match_check.repetition_limit;
3088 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
3089 }
3090 // If the match is empty we bail out, otherwise we fall through
3091 // to the on-success continuation.
3092 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3093 trace->backtrack());
3094 assembler->Bind(&skip_empty_check);
3095 return on_success()->Emit(compiler, trace);
3096 } 3317 }
3097 case POSITIVE_SUBMATCH_SUCCESS: 3318 case POSITIVE_SUBMATCH_SUCCESS: {
3098 if (!trace->is_trivial()) return trace->Flush(compiler, this); 3319 if (!trace->is_trivial()) {
3099 // TODO(erikcorry): Implement support. 3320 trace->Flush(compiler, this);
3100 if (info()->follows_word_interest || 3321 return;
3101 info()->follows_newline_interest ||
3102 info()->follows_start_interest) {
3103 return false;
3104 }
3105 if (info()->at_end) {
3106 Label at_end;
3107 // Load current character jumps to the label if we are beyond the string
3108 // end.
3109 assembler->LoadCurrentCharacter(0, &at_end);
3110 assembler->GoTo(trace->backtrack());
3111 assembler->Bind(&at_end);
3112 } 3322 }
3113 assembler->ReadCurrentPositionFromRegister( 3323 assembler->ReadCurrentPositionFromRegister(
3114 data_.u_submatch.current_position_register); 3324 data_.u_submatch.current_position_register);
3115 assembler->ReadStackPointerFromRegister( 3325 assembler->ReadStackPointerFromRegister(
3116 data_.u_submatch.stack_pointer_register); 3326 data_.u_submatch.stack_pointer_register);
3117 return on_success()->Emit(compiler, trace); 3327 int clear_register_count = data_.u_submatch.clear_register_count;
3328 if (clear_register_count == 0) {
3329 on_success()->Emit(compiler, trace);
3330 return;
3331 }
3332 int clear_registers_from = data_.u_submatch.clear_register_from;
3333 Label clear_registers_backtrack;
3334 Trace new_trace = *trace;
3335 new_trace.set_backtrack(&clear_registers_backtrack);
3336 on_success()->Emit(compiler, &new_trace);
3337
3338 assembler->Bind(&clear_registers_backtrack);
3339 int clear_registers_to = clear_registers_from + clear_register_count - 1;
3340 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
3341
3342 ASSERT(trace->backtrack() == NULL);
3343 assembler->Backtrack();
3344 return;
3345 }
3118 default: 3346 default:
3119 UNREACHABLE(); 3347 UNREACHABLE();
3120 return false;
3121 } 3348 }
3122 } 3349 }
3123 3350
3124 3351
3125 bool BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3352 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3126 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3353 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3127 if (!trace->is_trivial()) { 3354 if (!trace->is_trivial()) {
3128 return trace->Flush(compiler, this); 3355 trace->Flush(compiler, this);
3356 return;
3129 } 3357 }
3130 3358
3131 LimitResult limit_result = LimitVersions(compiler, trace); 3359 LimitResult limit_result = LimitVersions(compiler, trace);
3132 if (limit_result == DONE) return true; 3360 if (limit_result == DONE) return;
3133 if (limit_result == FAIL) return false;
3134 ASSERT(limit_result == CONTINUE); 3361 ASSERT(limit_result == CONTINUE);
3135 3362
3136 RecursionCheck rc(compiler); 3363 RecursionCheck rc(compiler);
3137 3364
3138 ASSERT_EQ(start_reg_ + 1, end_reg_); 3365 ASSERT_EQ(start_reg_ + 1, end_reg_);
3139 if (info()->at_end) { 3366 if (compiler->ignore_case()) {
3140 // If we are constrained to match at the end of the input then succeed 3367 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
3141 // iff the back reference is empty. 3368 trace->backtrack());
3142 assembler->CheckNotRegistersEqual(start_reg_,
3143 end_reg_,
3144 trace->backtrack());
3145 } else { 3369 } else {
3146 if (compiler->ignore_case()) { 3370 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
3147 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
3148 trace->backtrack());
3149 } else {
3150 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
3151 }
3152 } 3371 }
3153 return on_success()->Emit(compiler, trace); 3372 on_success()->Emit(compiler, trace);
3154 } 3373 }
3155 3374
3156 3375
3157 // ------------------------------------------------------------------- 3376 // -------------------------------------------------------------------
3158 // Dot/dotty output 3377 // Dot/dotty output
3159 3378
3160 3379
3161 #ifdef DEBUG 3380 #ifdef DEBUG
3162 3381
3163 3382
(...skipping 218 matching lines...) Expand 10 before | Expand all | Expand 10 after
3382 Visit(that->on_success()); 3601 Visit(that->on_success());
3383 } 3602 }
3384 3603
3385 3604
3386 void DotPrinter::VisitEnd(EndNode* that) { 3605 void DotPrinter::VisitEnd(EndNode* that) {
3387 stream()->Add(" n%p [style=bold, shape=point];\n", that); 3606 stream()->Add(" n%p [style=bold, shape=point];\n", that);
3388 PrintAttributes(that); 3607 PrintAttributes(that);
3389 } 3608 }
3390 3609
3391 3610
3611 void DotPrinter::VisitAssertion(AssertionNode* that) {
3612 stream()->Add(" n%p [", that);
3613 switch (that->type()) {
3614 case AssertionNode::AT_END:
3615 stream()->Add("label=\"$\", shape=septagon");
3616 break;
3617 case AssertionNode::AT_START:
3618 stream()->Add("label=\"^\", shape=septagon");
3619 break;
3620 case AssertionNode::AT_BOUNDARY:
3621 stream()->Add("label=\"\\b\", shape=septagon");
3622 break;
3623 case AssertionNode::AT_NON_BOUNDARY:
3624 stream()->Add("label=\"\\B\", shape=septagon");
3625 break;
3626 case AssertionNode::AFTER_NEWLINE:
3627 stream()->Add("label=\"(?<=\\n)\", shape=septagon");
3628 break;
3629 }
3630 stream()->Add("];\n");
3631 PrintAttributes(that);
3632 RegExpNode* successor = that->on_success();
3633 stream()->Add(" n%p -> n%p;\n", that, successor);
3634 Visit(successor);
3635 }
3636
3637
3392 void DotPrinter::VisitAction(ActionNode* that) { 3638 void DotPrinter::VisitAction(ActionNode* that) {
3393 stream()->Add(" n%p [", that); 3639 stream()->Add(" n%p [", that);
3394 switch (that->type_) { 3640 switch (that->type_) {
3395 case ActionNode::SET_REGISTER: 3641 case ActionNode::SET_REGISTER:
3396 stream()->Add("label=\"$%i:=%i\", shape=octagon", 3642 stream()->Add("label=\"$%i:=%i\", shape=octagon",
3397 that->data_.u_store_register.reg, 3643 that->data_.u_store_register.reg,
3398 that->data_.u_store_register.value); 3644 that->data_.u_store_register.value);
3399 break; 3645 break;
3400 case ActionNode::INCREMENT_REGISTER: 3646 case ActionNode::INCREMENT_REGISTER:
3401 stream()->Add("label=\"$%i++\", shape=octagon", 3647 stream()->Add("label=\"$%i++\", shape=octagon",
(...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after
3706 // backtrack. 3952 // backtrack.
3707 loop_return = ActionNode::EmptyMatchCheck(body_start_reg, 3953 loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
3708 reg_ctr, 3954 reg_ctr,
3709 min, 3955 min,
3710 loop_return); 3956 loop_return);
3711 } 3957 }
3712 RegExpNode* body_node = body->ToNode(compiler, loop_return); 3958 RegExpNode* body_node = body->ToNode(compiler, loop_return);
3713 if (body_can_be_empty) { 3959 if (body_can_be_empty) {
3714 // If the body can be empty we need to store the start position 3960 // If the body can be empty we need to store the start position
3715 // so we can bail out if it was empty. 3961 // so we can bail out if it was empty.
3716 body_node = ActionNode::StorePosition(body_start_reg, body_node); 3962 body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
3717 } 3963 }
3718 if (needs_capture_clearing) { 3964 if (needs_capture_clearing) {
3719 // Before entering the body of this loop we need to clear captures. 3965 // Before entering the body of this loop we need to clear captures.
3720 body_node = ActionNode::ClearCaptures(capture_registers, body_node); 3966 body_node = ActionNode::ClearCaptures(capture_registers, body_node);
3721 } 3967 }
3722 GuardedAlternative body_alt(body_node); 3968 GuardedAlternative body_alt(body_node);
3723 if (has_max) { 3969 if (has_max) {
3724 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); 3970 Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
3725 body_alt.AddGuard(body_guard); 3971 body_alt.AddGuard(body_guard);
3726 } 3972 }
(...skipping 15 matching lines...) Expand all
3742 return center; 3988 return center;
3743 } 3989 }
3744 } 3990 }
3745 3991
3746 3992
3747 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, 3993 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
3748 RegExpNode* on_success) { 3994 RegExpNode* on_success) {
3749 NodeInfo info; 3995 NodeInfo info;
3750 switch (type()) { 3996 switch (type()) {
3751 case START_OF_LINE: 3997 case START_OF_LINE:
3752 info.follows_newline_interest = true; 3998 return AssertionNode::AfterNewline(on_success);
3753 break;
3754 case START_OF_INPUT: 3999 case START_OF_INPUT:
3755 info.follows_start_interest = true; 4000 return AssertionNode::AtStart(on_success);
3756 break; 4001 case BOUNDARY:
3757 case BOUNDARY: case NON_BOUNDARY: 4002 return AssertionNode::AtBoundary(on_success);
3758 info.follows_word_interest = true; 4003 case NON_BOUNDARY:
3759 break; 4004 return AssertionNode::AtNonBoundary(on_success);
3760 case END_OF_INPUT: 4005 case END_OF_INPUT:
3761 info.at_end = true; 4006 return AssertionNode::AtEnd(on_success);
3762 break; 4007 case END_OF_LINE: {
3763 case END_OF_LINE: 4008 // Compile $ in multiline regexps as an alternation with a positive
3764 // This is wrong but has the effect of making the compiler abort. 4009 // lookahead in one side and an end-of-input on the other side.
3765 info.at_end = true; 4010 // We need two registers for the lookahead.
4011 int stack_pointer_register = compiler->AllocateRegister();
4012 int position_register = compiler->AllocateRegister();
4013 // The ChoiceNode to distinguish between a newline and end-of-input.
4014 ChoiceNode* result = new ChoiceNode(2);
4015 // Create a newline atom.
4016 ZoneList<CharacterRange>* newline_ranges =
4017 new ZoneList<CharacterRange>(3);
4018 CharacterRange::AddClassEscape('n', newline_ranges);
4019 RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');
4020 TextNode* newline_matcher = new TextNode(
4021 newline_atom,
4022 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
4023 position_register,
4024 0, // No captures inside.
4025 -1, // Ignored if no captures.
4026 on_success));
4027 // Create an end-of-input matcher.
4028 RegExpNode* end_of_line = ActionNode::BeginSubmatch(
4029 stack_pointer_register,
4030 position_register,
4031 newline_matcher);
4032 // Add the two alternatives to the ChoiceNode.
4033 GuardedAlternative eol_alternative(end_of_line);
4034 result->AddAlternative(eol_alternative);
4035 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
4036 result->AddAlternative(end_alternative);
4037 return result;
4038 }
4039 default:
4040 UNREACHABLE();
3766 } 4041 }
3767 return on_success->PropagateForward(&info); 4042 return on_success;
3768 } 4043 }
3769 4044
3770 4045
3771 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 4046 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
3772 RegExpNode* on_success) { 4047 RegExpNode* on_success) {
3773 return new BackReferenceNode(RegExpCapture::StartRegister(index()), 4048 return new BackReferenceNode(RegExpCapture::StartRegister(index()),
3774 RegExpCapture::EndRegister(index()), 4049 RegExpCapture::EndRegister(index()),
3775 on_success); 4050 on_success);
3776 } 4051 }
3777 4052
3778 4053
3779 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, 4054 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
3780 RegExpNode* on_success) { 4055 RegExpNode* on_success) {
3781 return on_success; 4056 return on_success;
3782 } 4057 }
3783 4058
3784 4059
3785 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, 4060 RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
3786 RegExpNode* on_success) { 4061 RegExpNode* on_success) {
3787 int stack_pointer_register = compiler->AllocateRegister(); 4062 int stack_pointer_register = compiler->AllocateRegister();
3788 int position_register = compiler->AllocateRegister(); 4063 int position_register = compiler->AllocateRegister();
4064
4065 const int registers_per_capture = 2;
4066 const int register_of_first_capture = 2;
4067 int register_count = capture_count_ * registers_per_capture;
4068 int register_start =
4069 register_of_first_capture + capture_from_ * registers_per_capture;
4070
3789 RegExpNode* success; 4071 RegExpNode* success;
3790 if (is_positive()) { 4072 if (is_positive()) {
3791 return ActionNode::BeginSubmatch( 4073 RegExpNode* node = ActionNode::BeginSubmatch(
3792 stack_pointer_register, 4074 stack_pointer_register,
3793 position_register, 4075 position_register,
3794 body()->ToNode( 4076 body()->ToNode(
3795 compiler, 4077 compiler,
3796 ActionNode::PositiveSubmatchSuccess(stack_pointer_register, 4078 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
3797 position_register, 4079 position_register,
4080 register_count,
4081 register_start,
3798 on_success))); 4082 on_success)));
4083 return node;
3799 } else { 4084 } else {
3800 // We use a ChoiceNode for a negative lookahead because it has most of 4085 // We use a ChoiceNode for a negative lookahead because it has most of
3801 // the characteristics we need. It has the body of the lookahead as its 4086 // the characteristics we need. It has the body of the lookahead as its
3802 // first alternative and the expression after the lookahead of the second 4087 // first alternative and the expression after the lookahead of the second
3803 // alternative. If the first alternative succeeds then the 4088 // alternative. If the first alternative succeeds then the
3804 // NegativeSubmatchSuccess will unwind the stack including everything the 4089 // NegativeSubmatchSuccess will unwind the stack including everything the
3805 // choice node set up and backtrack. If the first alternative fails then 4090 // choice node set up and backtrack. If the first alternative fails then
3806 // the second alternative is tried, which is exactly the desired result 4091 // the second alternative is tried, which is exactly the desired result
3807 // for a negative lookahead. In the case where the dispatch table 4092 // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
3808 // determines that the first alternative cannot match we will save time 4093 // ChoiceNode that knows to ignore the first exit when calculating quick
3809 // by not trying it. Things are not quite so well-optimized if the 4094 // checks.
3810 // dispatch table determines that the second alternative cannot match.
3811 // In this case we could optimize by immediately backtracking.
3812 ChoiceNode* choice_node = new ChoiceNode(2);
3813 GuardedAlternative body_alt( 4095 GuardedAlternative body_alt(
3814 body()->ToNode( 4096 body()->ToNode(
3815 compiler, 4097 compiler,
3816 success = new NegativeSubmatchSuccess(stack_pointer_register, 4098 success = new NegativeSubmatchSuccess(stack_pointer_register,
3817 position_register))); 4099 position_register,
3818 choice_node->AddAlternative(body_alt); 4100 register_count,
3819 choice_node->AddAlternative(GuardedAlternative(on_success)); 4101 register_start)));
4102 ChoiceNode* choice_node =
4103 new NegativeLookaheadChoiceNode(body_alt,
4104 GuardedAlternative(on_success));
3820 return ActionNode::BeginSubmatch(stack_pointer_register, 4105 return ActionNode::BeginSubmatch(stack_pointer_register,
3821 position_register, 4106 position_register,
3822 choice_node); 4107 choice_node);
3823 } 4108 }
3824 } 4109 }
3825 4110
3826 4111
3827 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, 4112 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
3828 RegExpNode* on_success) { 4113 RegExpNode* on_success) {
3829 return ToNode(body(), index(), compiler, on_success); 4114 return ToNode(body(), index(), compiler, on_success);
3830 } 4115 }
3831 4116
3832 4117
3833 RegExpNode* RegExpCapture::ToNode(RegExpTree* body, 4118 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
3834 int index, 4119 int index,
3835 RegExpCompiler* compiler, 4120 RegExpCompiler* compiler,
3836 RegExpNode* on_success) { 4121 RegExpNode* on_success) {
3837 int start_reg = RegExpCapture::StartRegister(index); 4122 int start_reg = RegExpCapture::StartRegister(index);
3838 int end_reg = RegExpCapture::EndRegister(index); 4123 int end_reg = RegExpCapture::EndRegister(index);
3839 RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); 4124 RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
3840 RegExpNode* body_node = body->ToNode(compiler, store_end); 4125 RegExpNode* body_node = body->ToNode(compiler, store_end);
3841 return ActionNode::StorePosition(start_reg, body_node); 4126 return ActionNode::StorePosition(start_reg, true, body_node);
3842 } 4127 }
3843 4128
3844 4129
3845 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, 4130 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
3846 RegExpNode* on_success) { 4131 RegExpNode* on_success) {
3847 ZoneList<RegExpTree*>* children = nodes(); 4132 ZoneList<RegExpTree*>* children = nodes();
3848 RegExpNode* current = on_success; 4133 RegExpNode* current = on_success;
3849 for (int i = children->length() - 1; i >= 0; i--) { 4134 for (int i = children->length() - 1; i >= 0; i--) {
3850 current = children->at(i)->ToNode(compiler, current); 4135 current = children->at(i)->ToNode(compiler, current);
3851 } 4136 }
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
3904 AddClassNegated(kLineTerminatorRanges, 4189 AddClassNegated(kLineTerminatorRanges,
3905 kLineTerminatorRangeCount, 4190 kLineTerminatorRangeCount,
3906 ranges); 4191 ranges);
3907 break; 4192 break;
3908 // This is not a character range as defined by the spec but a 4193 // This is not a character range as defined by the spec but a
3909 // convenient shorthand for a character class that matches any 4194 // convenient shorthand for a character class that matches any
3910 // character. 4195 // character.
3911 case '*': 4196 case '*':
3912 ranges->Add(CharacterRange::Everything()); 4197 ranges->Add(CharacterRange::Everything());
3913 break; 4198 break;
4199 // This is the set of characters matched by the $ and ^ symbols
4200 // in multiline mode.
4201 case 'n':
4202 AddClass(kLineTerminatorRanges,
4203 kLineTerminatorRangeCount,
4204 ranges);
4205 break;
3914 default: 4206 default:
3915 UNREACHABLE(); 4207 UNREACHABLE();
3916 } 4208 }
3917 } 4209 }
3918 4210
3919 4211
3920 Vector<const uc16> CharacterRange::GetWordBounds() { 4212 Vector<const uc16> CharacterRange::GetWordBounds() {
3921 return Vector<const uc16>(kWordRanges, kWordRangeCount); 4213 return Vector<const uc16>(kWordRanges, kWordRangeCount);
3922 } 4214 }
3923 4215
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
4089 4381
4090 template <class C> 4382 template <class C>
4091 static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) { 4383 static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
4092 NodeInfo full_info(*node->info()); 4384 NodeInfo full_info(*node->info());
4093 full_info.AddFromPreceding(info); 4385 full_info.AddFromPreceding(info);
4094 bool cloned = false; 4386 bool cloned = false;
4095 return RegExpNode::EnsureSibling(node, &full_info, &cloned); 4387 return RegExpNode::EnsureSibling(node, &full_info, &cloned);
4096 } 4388 }
4097 4389
4098 4390
4099 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
4100 NodeInfo full_info(*this->info());
4101 full_info.AddFromPreceding(info);
4102 bool cloned = false;
4103 ActionNode* action = EnsureSibling(this, &full_info, &cloned);
4104 action->set_on_success(action->on_success()->PropagateForward(info));
4105 return action;
4106 }
4107
4108
4109 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
4110 NodeInfo full_info(*this->info());
4111 full_info.AddFromPreceding(info);
4112 bool cloned = false;
4113 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
4114 if (cloned) {
4115 ZoneList<GuardedAlternative>* old_alternatives = alternatives();
4116 int count = old_alternatives->length();
4117 choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
4118 for (int i = 0; i < count; i++) {
4119 GuardedAlternative alternative = old_alternatives->at(i);
4120 alternative.set_node(alternative.node()->PropagateForward(info));
4121 choice->alternatives()->Add(alternative);
4122 }
4123 }
4124 return choice;
4125 }
4126
4127
4128 RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
4129 return PropagateToEndpoint(this, info);
4130 }
4131
4132
4133 RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
4134 NodeInfo full_info(*this->info());
4135 full_info.AddFromPreceding(info);
4136 bool cloned = false;
4137 BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);
4138 if (cloned) {
4139 // TODO(erikcorry): A back reference has to have two successors (by default
4140 // the same node). The first is used if the back reference matches a non-
4141 // empty back reference, the second if it matches an empty one. This
4142 // doesn't matter for at_end, which is the only one implemented right now,
4143 // but it will matter for other pieces of info.
4144 back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
4145 }
4146 return back_ref;
4147 }
4148
4149
4150 RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
4151 return PropagateToEndpoint(this, info);
4152 }
4153
4154
4155 // ------------------------------------------------------------------- 4391 // -------------------------------------------------------------------
4156 // Splay tree 4392 // Splay tree
4157 4393
4158 4394
4159 OutSet* OutSet::Extend(unsigned value) { 4395 OutSet* OutSet::Extend(unsigned value) {
4160 if (Get(value)) 4396 if (Get(value))
4161 return this; 4397 return this;
4162 if (successors() != NULL) { 4398 if (successors() != NULL) {
4163 for (int i = 0; i < successors()->length(); i++) { 4399 for (int i = 0; i < successors()->length(); i++) {
4164 OutSet* successor = successors()->at(i); 4400 OutSet* successor = successors()->at(i);
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
4382 EnsureAnalyzed(that->loop_node()); 4618 EnsureAnalyzed(that->loop_node());
4383 info->AddFromFollowing(that->loop_node()->info()); 4619 info->AddFromFollowing(that->loop_node()->info());
4384 } 4620 }
4385 4621
4386 4622
4387 void Analysis::VisitBackReference(BackReferenceNode* that) { 4623 void Analysis::VisitBackReference(BackReferenceNode* that) {
4388 EnsureAnalyzed(that->on_success()); 4624 EnsureAnalyzed(that->on_success());
4389 } 4625 }
4390 4626
4391 4627
4628 void Analysis::VisitAssertion(AssertionNode* that) {
4629 EnsureAnalyzed(that->on_success());
4630 }
4631
4632
4392 // ------------------------------------------------------------------- 4633 // -------------------------------------------------------------------
4393 // Dispatch table construction 4634 // Dispatch table construction
4394 4635
4395 4636
4396 void DispatchTableConstructor::VisitEnd(EndNode* that) { 4637 void DispatchTableConstructor::VisitEnd(EndNode* that) {
4397 AddRange(CharacterRange::Everything()); 4638 AddRange(CharacterRange::Everything());
4398 } 4639 }
4399 4640
4400 4641
4401 void DispatchTableConstructor::BuildTable(ChoiceNode* node) { 4642 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
4434 } 4675 }
4435 4676
4436 4677
4437 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) { 4678 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
4438 // TODO(160): Find the node that we refer back to and propagate its start 4679 // TODO(160): Find the node that we refer back to and propagate its start
4439 // set back to here. For now we just accept anything. 4680 // set back to here. For now we just accept anything.
4440 AddRange(CharacterRange::Everything()); 4681 AddRange(CharacterRange::Everything());
4441 } 4682 }
4442 4683
4443 4684
4685 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
4686 RegExpNode* target = that->on_success();
4687 target->Accept(this);
4688 }
4689
4690
4444 4691
4445 static int CompareRangeByFrom(const CharacterRange* a, 4692 static int CompareRangeByFrom(const CharacterRange* a,
4446 const CharacterRange* b) { 4693 const CharacterRange* b) {
4447 return Compare<uc16>(a->from(), b->from()); 4694 return Compare<uc16>(a->from(), b->from());
4448 } 4695 }
4449 4696
4450 4697
4451 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { 4698 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
4452 ranges->Sort(CompareRangeByFrom); 4699 ranges->Sort(CompareRangeByFrom);
4453 uc16 last = 0; 4700 uc16 last = 0;
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
4497 RegExpNode* target = that->on_success(); 4744 RegExpNode* target = that->on_success();
4498 target->Accept(this); 4745 target->Accept(this);
4499 } 4746 }
4500 4747
4501 4748
4502 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data, 4749 Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
4503 bool ignore_case, 4750 bool ignore_case,
4504 bool is_multiline, 4751 bool is_multiline,
4505 Handle<String> pattern, 4752 Handle<String> pattern,
4506 bool is_ascii) { 4753 bool is_ascii) {
4754 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
4755 return IrregexpRegExpTooBig(pattern);
4756 }
4507 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); 4757 RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii);
4508 // Wrap the body of the regexp in capture #0. 4758 // Wrap the body of the regexp in capture #0.
4509 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree, 4759 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
4510 0, 4760 0,
4511 &compiler, 4761 &compiler,
4512 compiler.accept()); 4762 compiler.accept());
4513 // Add a .*? at the beginning, outside the body capture. 4763 RegExpNode* node = captured_body;
4514 // Note: We could choose to not add this if the regexp is anchored at 4764 if (!data->tree->IsAnchored()) {
4515 // the start of the input but I'm not sure how best to do that and 4765 // Add a .*? at the beginning, outside the body capture, unless
4516 // since we don't even handle ^ yet I'm saving that optimization for 4766 // this expression is anchored at the beginning.
4517 // later. 4767 node = RegExpQuantifier::ToNode(0,
4518 RegExpNode* node = RegExpQuantifier::ToNode(0, 4768 RegExpTree::kInfinity,
4519 RegExpTree::kInfinity, 4769 false,
4520 false, 4770 new RegExpCharacterClass('*'),
4521 new RegExpCharacterClass('*'), 4771 &compiler,
4522 &compiler, 4772 captured_body);
4523 captured_body); 4773 }
4524 data->node = node; 4774 data->node = node;
4525 Analysis analysis(ignore_case); 4775 Analysis analysis(ignore_case);
4526 analysis.EnsureAnalyzed(node); 4776 analysis.EnsureAnalyzed(node);
4527 4777
4528 NodeInfo info = *node->info(); 4778 NodeInfo info = *node->info();
4529 4779
4530 if (is_multiline && !FLAG_attempt_multiline_irregexp) {
4531 return Handle<FixedArray>::null();
4532 }
4533
4534 if (FLAG_irregexp_native) { 4780 if (FLAG_irregexp_native) {
4535 #ifdef ARM 4781 #ifdef ARM
4536 // Unimplemented, fall-through to bytecode implementation. 4782 // Unimplemented, fall-through to bytecode implementation.
4537 #else // IA32 4783 #else // IA32
4538 RegExpMacroAssemblerIA32::Mode mode; 4784 RegExpMacroAssemblerIA32::Mode mode;
4539 if (is_ascii) { 4785 if (is_ascii) {
4540 mode = RegExpMacroAssemblerIA32::ASCII; 4786 mode = RegExpMacroAssemblerIA32::ASCII;
4541 } else { 4787 } else {
4542 mode = RegExpMacroAssemblerIA32::UC16; 4788 mode = RegExpMacroAssemblerIA32::UC16;
4543 } 4789 }
4544 RegExpMacroAssemblerIA32 macro_assembler(mode, 4790 RegExpMacroAssemblerIA32 macro_assembler(mode,
4545 (data->capture_count + 1) * 2); 4791 (data->capture_count + 1) * 2);
4546 return compiler.Assemble(&macro_assembler, 4792 return compiler.Assemble(&macro_assembler,
4547 node, 4793 node,
4548 data->capture_count, 4794 data->capture_count,
4549 pattern); 4795 pattern);
4550 #endif 4796 #endif
4551 } 4797 }
4552 EmbeddedVector<byte, 1024> codes; 4798 EmbeddedVector<byte, 1024> codes;
4553 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4799 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4554 return compiler.Assemble(&macro_assembler, 4800 return compiler.Assemble(&macro_assembler,
4555 node, 4801 node,
4556 data->capture_count, 4802 data->capture_count,
4557 pattern); 4803 pattern);
4558 } 4804 }
4559 4805
4560 4806
4561 }} // namespace v8::internal 4807 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/macro-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698