Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(585)

Side by Side Diff: src/jsregexp.cc

Issue 7860035: Merge bleeding edge up to 9192 into the GC branch. (Closed) Base URL: https://v8.googlecode.com/svn/branches/experimental/gc
Patch Set: Created 9 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
217 Handle<String> subject, 217 Handle<String> subject,
218 int index, 218 int index,
219 Handle<JSArray> last_match_info) { 219 Handle<JSArray> last_match_info) {
220 Isolate* isolate = re->GetIsolate(); 220 Isolate* isolate = re->GetIsolate();
221 221
222 ASSERT(0 <= index); 222 ASSERT(0 <= index);
223 ASSERT(index <= subject->length()); 223 ASSERT(index <= subject->length());
224 224
225 if (!subject->IsFlat()) FlattenString(subject); 225 if (!subject->IsFlat()) FlattenString(subject);
226 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid 226 AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
227 // Extract flattened substrings of cons strings before determining asciiness.
228 227
229 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); 228 String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
230 int needle_len = needle->length(); 229 int needle_len = needle->length();
231 ASSERT(needle->IsFlat()); 230 ASSERT(needle->IsFlat());
232 231
233 if (needle_len != 0) { 232 if (needle_len != 0) {
234 if (index + needle_len > subject->length()) { 233 if (index + needle_len > subject->length()) {
235 return isolate->factory()->null_value(); 234 return isolate->factory()->null_value();
236 } 235 }
237 236
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
340 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii)); 339 Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
341 ASSERT(error_string->IsString()); 340 ASSERT(error_string->IsString());
342 Handle<String> error_message(String::cast(error_string)); 341 Handle<String> error_message(String::cast(error_string));
343 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate); 342 CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
344 return false; 343 return false;
345 } 344 }
346 345
347 JSRegExp::Flags flags = re->GetFlags(); 346 JSRegExp::Flags flags = re->GetFlags();
348 347
349 Handle<String> pattern(re->Pattern()); 348 Handle<String> pattern(re->Pattern());
350 if (!pattern->IsFlat()) { 349 if (!pattern->IsFlat()) FlattenString(pattern);
351 FlattenString(pattern);
352 }
353
354 RegExpCompileData compile_data; 350 RegExpCompileData compile_data;
355 FlatStringReader reader(isolate, pattern); 351 FlatStringReader reader(isolate, pattern);
356 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(), 352 if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
357 &compile_data)) { 353 &compile_data)) {
358 // Throw an exception if we fail to parse the pattern. 354 // Throw an exception if we fail to parse the pattern.
359 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. 355 // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
360 ThrowRegExpException(re, 356 ThrowRegExpException(re,
361 pattern, 357 pattern,
362 compile_data.error, 358 compile_data.error,
363 "malformed_regexp"); 359 "malformed_regexp");
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
427 re->GetIsolate()->factory()->SetRegExpIrregexpData(re, 423 re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
428 JSRegExp::IRREGEXP, 424 JSRegExp::IRREGEXP,
429 pattern, 425 pattern,
430 flags, 426 flags,
431 capture_count); 427 capture_count);
432 } 428 }
433 429
434 430
435 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp, 431 int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
436 Handle<String> subject) { 432 Handle<String> subject) {
437 if (!subject->IsFlat()) { 433 if (!subject->IsFlat()) FlattenString(subject);
438 FlattenString(subject); 434
439 }
440 // Check the asciiness of the underlying storage. 435 // Check the asciiness of the underlying storage.
441 bool is_ascii; 436 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
442 { 437 if (!EnsureCompiledIrregexp(regexp, is_ascii)) return -1;
443 AssertNoAllocation no_gc; 438
444 String* sequential_string = *subject;
445 if (subject->IsConsString()) {
446 sequential_string = ConsString::cast(*subject)->first();
447 }
448 is_ascii = sequential_string->IsAsciiRepresentation();
449 }
450 if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
451 return -1;
452 }
453 #ifdef V8_INTERPRETED_REGEXP 439 #ifdef V8_INTERPRETED_REGEXP
454 // Byte-code regexp needs space allocated for all its registers. 440 // Byte-code regexp needs space allocated for all its registers.
455 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); 441 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
456 #else // V8_INTERPRETED_REGEXP 442 #else // V8_INTERPRETED_REGEXP
457 // Native regexp only needs room to output captures. Registers are handled 443 // Native regexp only needs room to output captures. Registers are handled
458 // internally. 444 // internally.
459 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; 445 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
460 #endif // V8_INTERPRETED_REGEXP 446 #endif // V8_INTERPRETED_REGEXP
461 } 447 }
462 448
463 449
464 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce( 450 RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
465 Handle<JSRegExp> regexp, 451 Handle<JSRegExp> regexp,
466 Handle<String> subject, 452 Handle<String> subject,
467 int index, 453 int index,
468 Vector<int> output) { 454 Vector<int> output) {
469 Isolate* isolate = regexp->GetIsolate(); 455 Isolate* isolate = regexp->GetIsolate();
470 456
471 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate); 457 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
472 458
473 ASSERT(index >= 0); 459 ASSERT(index >= 0);
474 ASSERT(index <= subject->length()); 460 ASSERT(index <= subject->length());
475 ASSERT(subject->IsFlat()); 461 ASSERT(subject->IsFlat());
476 462
477 // A flat ASCII string might have a two-byte first part. 463 bool is_ascii = subject->IsAsciiRepresentationUnderneath();
478 if (subject->IsConsString()) {
479 subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
480 }
481 464
482 #ifndef V8_INTERPRETED_REGEXP 465 #ifndef V8_INTERPRETED_REGEXP
483 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); 466 ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
484 do { 467 do {
485 bool is_ascii = subject->IsAsciiRepresentation();
486 EnsureCompiledIrregexp(regexp, is_ascii); 468 EnsureCompiledIrregexp(regexp, is_ascii);
487 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate); 469 Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
488 NativeRegExpMacroAssembler::Result res = 470 NativeRegExpMacroAssembler::Result res =
489 NativeRegExpMacroAssembler::Match(code, 471 NativeRegExpMacroAssembler::Match(code,
490 subject, 472 subject,
491 output.start(), 473 output.start(),
492 output.length(), 474 output.length(),
493 index, 475 index,
494 isolate); 476 isolate);
495 if (res != NativeRegExpMacroAssembler::RETRY) { 477 if (res != NativeRegExpMacroAssembler::RETRY) {
496 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION || 478 ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
497 isolate->has_pending_exception()); 479 isolate->has_pending_exception());
498 STATIC_ASSERT( 480 STATIC_ASSERT(
499 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS); 481 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
500 STATIC_ASSERT( 482 STATIC_ASSERT(
501 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE); 483 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
502 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) 484 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
503 == RE_EXCEPTION); 485 == RE_EXCEPTION);
504 return static_cast<IrregexpResult>(res); 486 return static_cast<IrregexpResult>(res);
505 } 487 }
506 // If result is RETRY, the string has changed representation, and we 488 // If result is RETRY, the string has changed representation, and we
507 // must restart from scratch. 489 // must restart from scratch.
508 // In this case, it means we must make sure we are prepared to handle 490 // In this case, it means we must make sure we are prepared to handle
509 // the, potentially, different subject (the string can switch between 491 // the, potentially, different subject (the string can switch between
510 // being internal and external, and even between being ASCII and UC16, 492 // being internal and external, and even between being ASCII and UC16,
511 // but the characters are always the same). 493 // but the characters are always the same).
512 IrregexpPrepare(regexp, subject); 494 IrregexpPrepare(regexp, subject);
495 is_ascii = subject->IsAsciiRepresentationUnderneath();
513 } while (true); 496 } while (true);
514 UNREACHABLE(); 497 UNREACHABLE();
515 return RE_EXCEPTION; 498 return RE_EXCEPTION;
516 #else // V8_INTERPRETED_REGEXP 499 #else // V8_INTERPRETED_REGEXP
517 500
518 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); 501 ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
519 bool is_ascii = subject->IsAsciiRepresentation();
520 // We must have done EnsureCompiledIrregexp, so we can get the number of 502 // We must have done EnsureCompiledIrregexp, so we can get the number of
521 // registers. 503 // registers.
522 int* register_vector = output.start(); 504 int* register_vector = output.start();
523 int number_of_capture_registers = 505 int number_of_capture_registers =
524 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; 506 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
525 for (int i = number_of_capture_registers - 1; i >= 0; i--) { 507 for (int i = number_of_capture_registers - 1; i >= 0; i--) {
526 register_vector[i] = -1; 508 register_vector[i] = -1;
527 } 509 }
528 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); 510 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
529 511
(...skipping 2142 matching lines...) Expand 10 before | Expand all | Expand 10 after
2672 } else { 2654 } else {
2673 return elm.cp_offset + elm.data.u_atom->data().length(); 2655 return elm.cp_offset + elm.data.u_atom->data().length();
2674 } 2656 }
2675 } 2657 }
2676 2658
2677 2659
2678 // Finds the fixed match length of a sequence of nodes that goes from 2660 // Finds the fixed match length of a sequence of nodes that goes from
2679 // this alternative and back to this choice node. If there are variable 2661 // this alternative and back to this choice node. If there are variable
2680 // length nodes or other complications in the way then return a sentinel 2662 // length nodes or other complications in the way then return a sentinel
2681 // value indicating that a greedy loop cannot be constructed. 2663 // value indicating that a greedy loop cannot be constructed.
2682 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) { 2664 int ChoiceNode::GreedyLoopTextLengthForAlternative(
2665 GuardedAlternative* alternative) {
2683 int length = 0; 2666 int length = 0;
2684 RegExpNode* node = alternative->node(); 2667 RegExpNode* node = alternative->node();
2685 // Later we will generate code for all these text nodes using recursion 2668 // Later we will generate code for all these text nodes using recursion
2686 // so we have to limit the max number. 2669 // so we have to limit the max number.
2687 int recursion_depth = 0; 2670 int recursion_depth = 0;
2688 while (node != this) { 2671 while (node != this) {
2689 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { 2672 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2690 return kNodeIsTooComplexForGreedyLoops; 2673 return kNodeIsTooComplexForGreedyLoops;
2691 } 2674 }
2692 int node_length = node->GreedyLoopTextLength(); 2675 int node_length = node->GreedyLoopTextLength();
(...skipping 18 matching lines...) Expand all
2711 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) { 2694 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
2712 ASSERT_EQ(continue_node_, NULL); 2695 ASSERT_EQ(continue_node_, NULL);
2713 AddAlternative(alt); 2696 AddAlternative(alt);
2714 continue_node_ = alt.node(); 2697 continue_node_ = alt.node();
2715 } 2698 }
2716 2699
2717 2700
2718 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2701 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2719 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 2702 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2720 if (trace->stop_node() == this) { 2703 if (trace->stop_node() == this) {
2721 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); 2704 int text_length =
2705 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
2722 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops); 2706 ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
2723 // Update the counter-based backtracking info on the stack. This is an 2707 // Update the counter-based backtracking info on the stack. This is an
2724 // optimization for greedy loops (see below). 2708 // optimization for greedy loops (see below).
2725 ASSERT(trace->cp_offset() == text_length); 2709 ASSERT(trace->cp_offset() == text_length);
2726 macro_assembler->AdvanceCurrentPosition(text_length); 2710 macro_assembler->AdvanceCurrentPosition(text_length);
2727 macro_assembler->GoTo(trace->loop_label()); 2711 macro_assembler->GoTo(trace->loop_label());
2728 return; 2712 return;
2729 } 2713 }
2730 ASSERT(trace->stop_node() == NULL); 2714 ASSERT(trace->stop_node() == NULL);
2731 if (!trace->is_trivial()) { 2715 if (!trace->is_trivial()) {
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after
2904 int new_flush_budget = trace->flush_budget() / choice_count; 2888 int new_flush_budget = trace->flush_budget() / choice_count;
2905 if (trace->flush_budget() == 0 && trace->actions() != NULL) { 2889 if (trace->flush_budget() == 0 && trace->actions() != NULL) {
2906 trace->Flush(compiler, this); 2890 trace->Flush(compiler, this);
2907 return; 2891 return;
2908 } 2892 }
2909 2893
2910 RecursionCheck rc(compiler); 2894 RecursionCheck rc(compiler);
2911 2895
2912 Trace* current_trace = trace; 2896 Trace* current_trace = trace;
2913 2897
2914 int text_length = GreedyLoopTextLength(&(alternatives_->at(0))); 2898 int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
2915 bool greedy_loop = false; 2899 bool greedy_loop = false;
2916 Label greedy_loop_label; 2900 Label greedy_loop_label;
2917 Trace counter_backtrack_trace; 2901 Trace counter_backtrack_trace;
2918 counter_backtrack_trace.set_backtrack(&greedy_loop_label); 2902 counter_backtrack_trace.set_backtrack(&greedy_loop_label);
2919 if (not_at_start()) counter_backtrack_trace.set_at_start(false); 2903 if (not_at_start()) counter_backtrack_trace.set_at_start(false);
2920 2904
2921 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { 2905 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
2922 // Here we have special handling for greedy loops containing only text nodes 2906 // Here we have special handling for greedy loops containing only text nodes
2923 // and other simple nodes. These are handled by pushing the current 2907 // and other simple nodes. These are handled by pushing the current
2924 // position on the stack and then incrementing the current position each 2908 // position on the stack and then incrementing the current position each
(...skipping 2424 matching lines...) Expand 10 before | Expand all | Expand 10 after
5349 } 5333 }
5350 5334
5351 return compiler.Assemble(&macro_assembler, 5335 return compiler.Assemble(&macro_assembler,
5352 node, 5336 node,
5353 data->capture_count, 5337 data->capture_count,
5354 pattern); 5338 pattern);
5355 } 5339 }
5356 5340
5357 5341
5358 }} // namespace v8::internal 5342 }} // namespace v8::internal
OLDNEW
« no previous file with comments | « src/jsregexp.h ('k') | src/liveedit.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698