Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(76)

Side by Side Diff: src/jsregexp.cc

Issue 18193: Add support for \b and ^ and $ in multiline mode, completing Irregexp... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1504 matching lines...) Expand 10 before | Expand all | Expand 10 after
1515 if (backtrack() == NULL) { 1515 if (backtrack() == NULL) {
1516 assembler->Backtrack(); 1516 assembler->Backtrack();
1517 } else { 1517 } else {
1518 assembler->GoTo(backtrack()); 1518 assembler->GoTo(backtrack());
1519 } 1519 }
1520 1520
1521 return true; 1521 return true;
1522 } 1522 }
1523 1523
1524 1524
1525 void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, Trace* trace) {
1526 if (info()->at_end) {
1527 Label succeed;
1528 // LoadCurrentCharacter will go to the label if we are at the end of the
1529 // input string.
1530 assembler->LoadCurrentCharacter(0, &succeed);
1531 assembler->GoTo(trace->backtrack());
1532 assembler->Bind(&succeed);
1533 }
1534 }
1535
1536
1537 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) { 1525 bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
1538 if (!trace->is_trivial()) { 1526 if (!trace->is_trivial()) {
1539 return trace->Flush(compiler, this); 1527 return trace->Flush(compiler, this);
1540 } 1528 }
1541 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1529 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1542 if (!label()->is_bound()) { 1530 if (!label()->is_bound()) {
1543 assembler->Bind(label()); 1531 assembler->Bind(label());
1544 } 1532 }
1545 EmitInfoChecks(assembler, trace);
1546 assembler->ReadCurrentPositionFromRegister(current_position_register_); 1533 assembler->ReadCurrentPositionFromRegister(current_position_register_);
1547 assembler->ReadStackPointerFromRegister(stack_pointer_register_); 1534 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
1548 // Now that we have unwound the stack we find at the top of the stack the 1535 // Now that we have unwound the stack we find at the top of the stack the
1549 // backtrack that the BeginSubmatch node got. 1536 // backtrack that the BeginSubmatch node got.
1550 assembler->Backtrack(); 1537 assembler->Backtrack();
1551 return true; 1538 return true;
1552 } 1539 }
1553 1540
1554 1541
1555 bool EndNode::Emit(RegExpCompiler* compiler, Trace* trace) { 1542 bool EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1556 if (!trace->is_trivial()) { 1543 if (!trace->is_trivial()) {
1557 return trace->Flush(compiler, this); 1544 return trace->Flush(compiler, this);
1558 } 1545 }
1559 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 1546 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1560 if (!label()->is_bound()) { 1547 if (!label()->is_bound()) {
1561 assembler->Bind(label()); 1548 assembler->Bind(label());
1562 } 1549 }
1563 switch (action_) { 1550 switch (action_) {
1564 case ACCEPT: 1551 case ACCEPT:
1565 EmitInfoChecks(assembler, trace);
1566 assembler->Succeed(); 1552 assembler->Succeed();
1567 return true; 1553 return true;
1568 case BACKTRACK: 1554 case BACKTRACK:
1569 ASSERT(!info()->at_end);
1570 assembler->GoTo(trace->backtrack()); 1555 assembler->GoTo(trace->backtrack());
1571 return true; 1556 return true;
1572 case NEGATIVE_SUBMATCH_SUCCESS: 1557 case NEGATIVE_SUBMATCH_SUCCESS:
1573 // This case is handled in a different virtual method. 1558 // This case is handled in a different virtual method.
1574 UNREACHABLE(); 1559 UNREACHABLE();
1575 } 1560 }
1576 UNIMPLEMENTED(); 1561 UNIMPLEMENTED();
1577 return false; 1562 return false;
1578 } 1563 }
1579 1564
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after
1928 macro_assembler->Bind(&success); 1913 macro_assembler->Bind(&success);
1929 } 1914 }
1930 1915
1931 1916
1932 RegExpNode::~RegExpNode() { 1917 RegExpNode::~RegExpNode() {
1933 } 1918 }
1934 1919
1935 1920
1936 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, 1921 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
1937 Trace* trace) { 1922 Trace* trace) {
1938 // TODO(erikcorry): Implement support.
1939 if (info_.follows_word_interest ||
1940 info_.follows_newline_interest ||
1941 info_.follows_start_interest) {
1942 return FAIL;
1943 }
1944
1945 // If we are generating a greedy loop then don't stop and don't reuse code. 1923 // If we are generating a greedy loop then don't stop and don't reuse code.
1946 if (trace->stop_node() != NULL) { 1924 if (trace->stop_node() != NULL) {
1947 return CONTINUE; 1925 return CONTINUE;
1948 } 1926 }
1949 1927
1950 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); 1928 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1951 if (trace->is_trivial()) { 1929 if (trace->is_trivial()) {
1952 if (label_.is_bound()) { 1930 if (label_.is_bound()) {
1953 // We are being asked to generate a generic version, but that's already 1931 // We are being asked to generate a generic version, but that's already
1954 // been done so just go to it. 1932 // been done so just go to it.
(...skipping 28 matching lines...) Expand all
1983 } 1961 }
1984 1962
1985 1963
1986 int ActionNode::EatsAtLeast(int recursion_depth) { 1964 int ActionNode::EatsAtLeast(int recursion_depth) {
1987 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; 1965 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
1988 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! 1966 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
1989 return on_success()->EatsAtLeast(recursion_depth + 1); 1967 return on_success()->EatsAtLeast(recursion_depth + 1);
1990 } 1968 }
1991 1969
1992 1970
1971 int AssertionNode::EatsAtLeast(int recursion_depth) {
1972 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
1973 return on_success()->EatsAtLeast(recursion_depth + 1);
1974 }
1975
1976
1977 int BackReferenceNode::EatsAtLeast(int recursion_depth) {
1978 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
1979 return on_success()->EatsAtLeast(recursion_depth + 1);
1980 }
1981
1982
1983
1993 int TextNode::EatsAtLeast(int recursion_depth) { 1984 int TextNode::EatsAtLeast(int recursion_depth) {
1994 int answer = Length(); 1985 int answer = Length();
1995 if (answer >= 4) return answer; 1986 if (answer >= 4) return answer;
1996 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer; 1987 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
1997 return answer + on_success()->EatsAtLeast(recursion_depth + 1); 1988 return answer + on_success()->EatsAtLeast(recursion_depth + 1);
1998 } 1989 }
1999 1990
2000 1991
2001 int ChoiceNode::EatsAtLeastHelper(int recursion_depth, 1992 int ChoiceNode::EatsAtLeastHelper(int recursion_depth,
2002 RegExpNode* ignore_this_node) { 1993 RegExpNode* ignore_this_node) {
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after
2250 for (int i = 0; i < characters_; i++) { 2241 for (int i = 0; i < characters_; i++) {
2251 positions_[i].mask = 0; 2242 positions_[i].mask = 0;
2252 positions_[i].value = 0; 2243 positions_[i].value = 0;
2253 positions_[i].determines_perfectly = false; 2244 positions_[i].determines_perfectly = false;
2254 } 2245 }
2255 characters_ = 0; 2246 characters_ = 0;
2256 } 2247 }
2257 2248
2258 2249
2259 void QuickCheckDetails::Advance(int by, bool ascii) { 2250 void QuickCheckDetails::Advance(int by, bool ascii) {
2260 ASSERT(by > 0); 2251 ASSERT(by >= 0);
2261 if (by >= characters_) { 2252 if (by >= characters_) {
2262 Clear(); 2253 Clear();
2263 return; 2254 return;
2264 } 2255 }
2265 for (int i = 0; i < characters_ - by; i++) { 2256 for (int i = 0; i < characters_ - by; i++) {
2266 positions_[i] = positions_[by + i]; 2257 positions_[i] = positions_[by + i];
2267 } 2258 }
2268 for (int i = characters_ - by; i < characters_; i++) { 2259 for (int i = characters_ - by; i < characters_; i++) {
2269 positions_[i].mask = 0; 2260 positions_[i].mask = 0;
2270 positions_[i].value = 0; 2261 positions_[i].value = 0;
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
2335 for (int i = 1; i < choice_count; i++) { 2326 for (int i = 1; i < choice_count; i++) {
2336 QuickCheckDetails new_details(details->characters()); 2327 QuickCheckDetails new_details(details->characters());
2337 RegExpNode* node = alternatives_->at(i).node(); 2328 RegExpNode* node = alternatives_->at(i).node();
2338 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in); 2329 node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
2339 // Here we merge the quick match details of the two branches. 2330 // Here we merge the quick match details of the two branches.
2340 details->Merge(&new_details, characters_filled_in); 2331 details->Merge(&new_details, characters_filled_in);
2341 } 2332 }
2342 } 2333 }
2343 2334
2344 2335
2336 // Check for [0-9A-Z_a-z].
2337 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2338 Label* word,
2339 Label* non_word,
2340 bool fall_through_on_word) {
2341 assembler->CheckCharacterGT('z', non_word);
2342 assembler->CheckCharacterLT('0', non_word);
2343 assembler->CheckCharacterGT('a' - 1, word);
2344 assembler->CheckCharacterLT('9' + 1, word);
2345 assembler->CheckCharacterLT('A', non_word);
2346 assembler->CheckCharacterLT('Z' + 1, word);
2347 if (fall_through_on_word) {
2348 assembler->CheckNotCharacter('_', non_word);
2349 } else {
2350 assembler->CheckCharacter('_', word);
2351 }
2352 }
2353
2354
2355 bool AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2356 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2357 switch (type_) {
2358 case AT_END: {
2359 Label ok;
2360 assembler->LoadCurrentCharacter(trace->cp_offset(), &ok);
2361 assembler->GoTo(trace->backtrack());
2362 assembler->Bind(&ok);
2363 break;
2364 }
2365 case AT_START:
2366 assembler->CheckNotAtStart(trace->backtrack());
2367 break;
2368 case AFTER_NEWLINE: {
2369 // Check whether we are either at the start or immediately after a
2370 // newline.
2371 Trace new_trace(*trace);
2372 // Invalidate the current character register.
2373 new_trace.AdvanceCurrentPositionInTrace(0, compiler->ascii());
2374
2375 Label ok;
2376 if (new_trace.cp_offset() == 0) {
2377 assembler->CheckAtStart(&ok);
2378 }
2379 assembler->LoadCurrentCharacter(-1, new_trace.backtrack(), false);
Christian Plesner Hansen 2009/01/19 10:28:59 Maybe a comment about why this works when you're a
2380 // Newline means \n, \r, 0x2028 or 0x2029.
2381 if (!compiler->ascii()) {
2382 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
2383 }
2384 assembler->CheckCharacter('\n', &ok);
2385 assembler->CheckNotCharacter('\r', new_trace.backtrack());
2386 assembler->Bind(&ok);
2387 return on_success()->Emit(compiler, &new_trace);
2388 }
2389 case AT_NON_BOUNDARY:
2390 case AT_BOUNDARY: {
2391 Label before_non_word;
Christian Plesner Hansen 2009/01/19 10:28:59 Maybe you could split these huge cases out into se
2392 Label before_word;
2393 if (trace->characters_preloaded() != 1) {
2394 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
2395 }
2396 // Fall through on non-word.
2397 EmitWordCheck(assembler, &before_word, &before_non_word, false);
2398
2399 // Invalidate the current character register.
2400 Trace new_trace(*trace);
2401 new_trace.AdvanceCurrentPositionInTrace(0, compiler->ascii());
2402
2403 Label ok;
2404 Label* boundary;
2405 Label* not_boundary;
2406 if (type_ == AT_BOUNDARY) {
2407 boundary = &ok;
2408 not_boundary = new_trace.backtrack();
2409 } else {
2410 not_boundary = &ok;
2411 boundary = new_trace.backtrack();
2412 }
2413
2414 // Next character is not a word character.
2415 assembler->Bind(&before_non_word);
2416 if (new_trace.cp_offset() == 0) {
2417 assembler->CheckAtStart(not_boundary);
2418 }
2419 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
Christian Plesner Hansen 2009/01/19 10:28:59 What happens here if we're right at the beginning
2420 &ok, // Unused dummy label in this call.
2421 false);
2422 // Fall through on non-word.
2423 EmitWordCheck(assembler, boundary, not_boundary, false);
2424 assembler->GoTo(not_boundary);
2425
2426 // Next character is a word character.
2427 assembler->Bind(&before_word);
2428 if (new_trace.cp_offset() == 0) {
2429 assembler->CheckAtStart(boundary);
2430 }
2431 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
2432 &ok, // Unused dummy label in this call.
2433 false);
2434 bool fall_through_on_word = (type_ == AT_NON_BOUNDARY);
2435 EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word);
2436
2437 assembler->Bind(&ok);
2438
2439 return on_success()->Emit(compiler, &new_trace);
2440 }
2441 }
2442 return on_success()->Emit(compiler, trace);
2443 }
2444
2445
2345 // We call this repeatedly to generate code for each pass over the text node. 2446 // We call this repeatedly to generate code for each pass over the text node.
2346 // The passes are in increasing order of difficulty because we hope one 2447 // The passes are in increasing order of difficulty because we hope one
2347 // of the first passes will fail in which case we are saved the work of the 2448 // of the first passes will fail in which case we are saved the work of the
2348 // later passes. for example for the case independent regexp /%[asdfghjkl]a/ 2449 // later passes. for example for the case independent regexp /%[asdfghjkl]a/
2349 // we will check the '%' in the first pass, the case independent 'a' in the 2450 // we will check the '%' in the first pass, the case independent 'a' in the
2350 // second pass and the character class in the last pass. 2451 // second pass and the character class in the last pass.
2351 // 2452 //
2352 // The passes are done from right to left, so for example to test for /bar/ 2453 // The passes are done from right to left, so for example to test for /bar/
2353 // we will first test for an 'r' with offset 2, then an 'a' with offset 1 2454 // we will first test for an 'r' with offset 2, then an 'a' with offset 1
2354 // and then a 'b' with offset 0. This means we can avoid the end-of-input 2455 // and then a 'b' with offset 0. This means we can avoid the end-of-input
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
2480 // way) and character classes. For efficiency we do not do this in a single 2581 // way) and character classes. For efficiency we do not do this in a single
2481 // pass from left to right. Instead we pass over the text node several times, 2582 // pass from left to right. Instead we pass over the text node several times,
2482 // emitting code for some character positions every time. See the comment on 2583 // emitting code for some character positions every time. See the comment on
2483 // TextEmitPass for details. 2584 // TextEmitPass for details.
2484 bool TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { 2585 bool TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
2485 LimitResult limit_result = LimitVersions(compiler, trace); 2586 LimitResult limit_result = LimitVersions(compiler, trace);
2486 if (limit_result == FAIL) return false; 2587 if (limit_result == FAIL) return false;
2487 if (limit_result == DONE) return true; 2588 if (limit_result == DONE) return true;
2488 ASSERT(limit_result == CONTINUE); 2589 ASSERT(limit_result == CONTINUE);
2489 2590
2490 if (info()->follows_word_interest ||
2491 info()->follows_newline_interest ||
2492 info()->follows_start_interest) {
2493 return false;
2494 }
2495
2496 if (info()->at_end) {
2497 compiler->macro_assembler()->GoTo(trace->backtrack());
2498 return true;
2499 }
2500
2501 if (compiler->ascii()) { 2591 if (compiler->ascii()) {
2502 int dummy = 0; 2592 int dummy = 0;
2503 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy); 2593 TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
2504 } 2594 }
2505 2595
2506 bool first_elt_done = false; 2596 bool first_elt_done = false;
2507 int bound_checked_to = trace->cp_offset() - 1; 2597 int bound_checked_to = trace->cp_offset() - 1;
2508 bound_checked_to += trace->bound_checked_up_to(); 2598 bound_checked_to += trace->bound_checked_up_to();
2509 2599
2510 // If a character is preloaded into the current character register then 2600 // If a character is preloaded into the current character register then
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
2555 &bound_checked_to); 2645 &bound_checked_to);
2556 2646
2557 Trace successor_trace(*trace); 2647 Trace successor_trace(*trace);
2558 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler->ascii()); 2648 successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler->ascii());
2559 RecursionCheck rc(compiler); 2649 RecursionCheck rc(compiler);
2560 return on_success()->Emit(compiler, &successor_trace); 2650 return on_success()->Emit(compiler, &successor_trace);
2561 } 2651 }
2562 2652
2563 2653
2564 void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) { 2654 void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) {
2565 ASSERT(by > 0); 2655 // We can call this with by == 0. In that case it just means that the
Christian Plesner Hansen 2009/01/19 10:28:59 Maybe split invalidation out into a separate metho
2656 // current position register has been invalidated.
2657
2566 // We don't have an instruction for shifting the current character register 2658 // We don't have an instruction for shifting the current character register
2567 // down or for using a shifted value for anything so lets just forget that 2659 // down or for using a shifted value for anything so lets just forget that
2568 // we preloaded any characters into it. 2660 // we preloaded any characters into it.
2569 characters_preloaded_ = 0; 2661 characters_preloaded_ = 0;
2570 // Adjust the offsets of the quick check performed information. This 2662 // Adjust the offsets of the quick check performed information. This
2571 // information is used to find out what we already determined about the 2663 // information is used to find out what we already determined about the
2572 // characters by means of mask and compare. 2664 // characters by means of mask and compare.
2573 quick_check_performed_.Advance(by, ascii); 2665 quick_check_performed_.Advance(by, ascii);
2574 cp_offset_ += by; 2666 cp_offset_ += by;
2575 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by); 2667 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
2609 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) { 2701 int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) {
2610 int length = 0; 2702 int length = 0;
2611 RegExpNode* node = alternative->node(); 2703 RegExpNode* node = alternative->node();
2612 // Later we will generate code for all these text nodes using recursion 2704 // Later we will generate code for all these text nodes using recursion
2613 // so we have to limit the max number. 2705 // so we have to limit the max number.
2614 int recursion_depth = 0; 2706 int recursion_depth = 0;
2615 while (node != this) { 2707 while (node != this) {
2616 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) { 2708 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
2617 return kNodeIsTooComplexForGreedyLoops; 2709 return kNodeIsTooComplexForGreedyLoops;
2618 } 2710 }
2619 NodeInfo* info = node->info();
2620 if (info->follows_word_interest ||
2621 info->follows_newline_interest ||
2622 info->follows_start_interest) {
2623 return kNodeIsTooComplexForGreedyLoops;
2624 }
2625 int node_length = node->GreedyLoopTextLength(); 2711 int node_length = node->GreedyLoopTextLength();
2626 if (node_length == kNodeIsTooComplexForGreedyLoops) { 2712 if (node_length == kNodeIsTooComplexForGreedyLoops) {
2627 return kNodeIsTooComplexForGreedyLoops; 2713 return kNodeIsTooComplexForGreedyLoops;
2628 } 2714 }
2629 length += node_length; 2715 length += node_length;
2630 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); 2716 SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
2631 node = seq_node->on_success(); 2717 node = seq_node->on_success();
2632 } 2718 }
2633 return length; 2719 return length;
2634 } 2720 }
(...skipping 454 matching lines...) Expand 10 before | Expand all | Expand 10 after
3089 } 3175 }
3090 // If the match is empty we bail out, otherwise we fall through 3176 // If the match is empty we bail out, otherwise we fall through
3091 // to the on-success continuation. 3177 // to the on-success continuation.
3092 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register, 3178 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
3093 trace->backtrack()); 3179 trace->backtrack());
3094 assembler->Bind(&skip_empty_check); 3180 assembler->Bind(&skip_empty_check);
3095 return on_success()->Emit(compiler, trace); 3181 return on_success()->Emit(compiler, trace);
3096 } 3182 }
3097 case POSITIVE_SUBMATCH_SUCCESS: 3183 case POSITIVE_SUBMATCH_SUCCESS:
3098 if (!trace->is_trivial()) return trace->Flush(compiler, this); 3184 if (!trace->is_trivial()) return trace->Flush(compiler, this);
3099 // TODO(erikcorry): Implement support.
3100 if (info()->follows_word_interest ||
3101 info()->follows_newline_interest ||
3102 info()->follows_start_interest) {
3103 return false;
3104 }
3105 if (info()->at_end) {
3106 Label at_end;
3107 // Load current character jumps to the label if we are beyond the string
3108 // end.
3109 assembler->LoadCurrentCharacter(0, &at_end);
3110 assembler->GoTo(trace->backtrack());
3111 assembler->Bind(&at_end);
3112 }
3113 assembler->ReadCurrentPositionFromRegister( 3185 assembler->ReadCurrentPositionFromRegister(
3114 data_.u_submatch.current_position_register); 3186 data_.u_submatch.current_position_register);
3115 assembler->ReadStackPointerFromRegister( 3187 assembler->ReadStackPointerFromRegister(
3116 data_.u_submatch.stack_pointer_register); 3188 data_.u_submatch.stack_pointer_register);
3117 return on_success()->Emit(compiler, trace); 3189 return on_success()->Emit(compiler, trace);
3118 default: 3190 default:
3119 UNREACHABLE(); 3191 UNREACHABLE();
3120 return false; 3192 return false;
3121 } 3193 }
3122 } 3194 }
3123 3195
3124 3196
3125 bool BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { 3197 bool BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3126 RegExpMacroAssembler* assembler = compiler->macro_assembler(); 3198 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3127 if (!trace->is_trivial()) { 3199 if (!trace->is_trivial()) {
3128 return trace->Flush(compiler, this); 3200 return trace->Flush(compiler, this);
3129 } 3201 }
3130 3202
3131 LimitResult limit_result = LimitVersions(compiler, trace); 3203 LimitResult limit_result = LimitVersions(compiler, trace);
3132 if (limit_result == DONE) return true; 3204 if (limit_result == DONE) return true;
3133 if (limit_result == FAIL) return false; 3205 if (limit_result == FAIL) return false;
3134 ASSERT(limit_result == CONTINUE); 3206 ASSERT(limit_result == CONTINUE);
3135 3207
3136 RecursionCheck rc(compiler); 3208 RecursionCheck rc(compiler);
3137 3209
3138 ASSERT_EQ(start_reg_ + 1, end_reg_); 3210 ASSERT_EQ(start_reg_ + 1, end_reg_);
3139 if (info()->at_end) { 3211 if (compiler->ignore_case()) {
3140 // If we are constrained to match at the end of the input then succeed 3212 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
3141 // iff the back reference is empty. 3213 trace->backtrack());
3142 assembler->CheckNotRegistersEqual(start_reg_,
3143 end_reg_,
3144 trace->backtrack());
3145 } else { 3214 } else {
3146 if (compiler->ignore_case()) { 3215 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
3147 assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
3148 trace->backtrack());
3149 } else {
3150 assembler->CheckNotBackReference(start_reg_, trace->backtrack());
3151 }
3152 } 3216 }
3153 return on_success()->Emit(compiler, trace); 3217 return on_success()->Emit(compiler, trace);
3154 } 3218 }
3155 3219
3156 3220
3157 // ------------------------------------------------------------------- 3221 // -------------------------------------------------------------------
3158 // Dot/dotty output 3222 // Dot/dotty output
3159 3223
3160 3224
3161 #ifdef DEBUG 3225 #ifdef DEBUG
(...skipping 220 matching lines...) Expand 10 before | Expand all | Expand 10 after
3382 Visit(that->on_success()); 3446 Visit(that->on_success());
3383 } 3447 }
3384 3448
3385 3449
3386 void DotPrinter::VisitEnd(EndNode* that) { 3450 void DotPrinter::VisitEnd(EndNode* that) {
3387 stream()->Add(" n%p [style=bold, shape=point];\n", that); 3451 stream()->Add(" n%p [style=bold, shape=point];\n", that);
3388 PrintAttributes(that); 3452 PrintAttributes(that);
3389 } 3453 }
3390 3454
3391 3455
3456 void DotPrinter::VisitAssertion(AssertionNode* that) {
3457 stream()->Add(" n%p [", that);
3458 switch (that->type()) {
3459 case AssertionNode::AT_END:
3460 stream()->Add("label=\"$\", shape=septagon");
3461 break;
3462 case AssertionNode::AT_START:
3463 stream()->Add("label=\"^\", shape=septagon");
3464 break;
3465 case AssertionNode::AT_BOUNDARY:
3466 stream()->Add("label=\"\\b\", shape=septagon");
3467 break;
3468 case AssertionNode::AT_NON_BOUNDARY:
3469 stream()->Add("label=\"\\B\", shape=septagon");
3470 break;
3471 case AssertionNode::AFTER_NEWLINE:
3472 stream()->Add("label=\"(?<=\\n)\", shape=septagon");
3473 break;
3474 }
3475 stream()->Add("];\n");
3476 PrintAttributes(that);
3477 RegExpNode* successor = that->on_success();
3478 stream()->Add(" n%p -> n%p;\n", that, successor);
3479 Visit(successor);
3480 }
3481
3482
3392 void DotPrinter::VisitAction(ActionNode* that) { 3483 void DotPrinter::VisitAction(ActionNode* that) {
3393 stream()->Add(" n%p [", that); 3484 stream()->Add(" n%p [", that);
3394 switch (that->type_) { 3485 switch (that->type_) {
3395 case ActionNode::SET_REGISTER: 3486 case ActionNode::SET_REGISTER:
3396 stream()->Add("label=\"$%i:=%i\", shape=octagon", 3487 stream()->Add("label=\"$%i:=%i\", shape=octagon",
3397 that->data_.u_store_register.reg, 3488 that->data_.u_store_register.reg,
3398 that->data_.u_store_register.value); 3489 that->data_.u_store_register.value);
3399 break; 3490 break;
3400 case ActionNode::INCREMENT_REGISTER: 3491 case ActionNode::INCREMENT_REGISTER:
3401 stream()->Add("label=\"$%i++\", shape=octagon", 3492 stream()->Add("label=\"$%i++\", shape=octagon",
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after
3742 return center; 3833 return center;
3743 } 3834 }
3744 } 3835 }
3745 3836
3746 3837
3747 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, 3838 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
3748 RegExpNode* on_success) { 3839 RegExpNode* on_success) {
3749 NodeInfo info; 3840 NodeInfo info;
3750 switch (type()) { 3841 switch (type()) {
3751 case START_OF_LINE: 3842 case START_OF_LINE:
3752 info.follows_newline_interest = true; 3843 return AssertionNode::AfterNewline(on_success);
3753 break;
3754 case START_OF_INPUT: 3844 case START_OF_INPUT:
3755 info.follows_start_interest = true; 3845 return AssertionNode::AtStart(on_success);
3756 break; 3846 case BOUNDARY:
3757 case BOUNDARY: case NON_BOUNDARY: 3847 return AssertionNode::AtBoundary(on_success);
3758 info.follows_word_interest = true; 3848 case NON_BOUNDARY:
3759 break; 3849 return AssertionNode::AtNonBoundary(on_success);
3760 case END_OF_INPUT: 3850 case END_OF_INPUT:
3761 info.at_end = true; 3851 return AssertionNode::AtEnd(on_success);
3762 break;
3763 case END_OF_LINE: 3852 case END_OF_LINE:
3764 // This is wrong but has the effect of making the compiler abort. 3853 // Compile $ in multiline regexps as an alternation with a positive
3765 info.at_end = true; 3854 // lookahead in one side and an end-of-input on the other side.
3855 // We need two registers for the lookahead.
3856 int stack_pointer_register = compiler->AllocateRegister();
3857 int position_register = compiler->AllocateRegister();
3858 // The ChoiceNode to distinguish between a newline and end-of-input.
3859 ChoiceNode* result = new ChoiceNode(2);
3860 // Create a newline atom.
3861 ZoneList<CharacterRange>* newline_ranges =
3862 new ZoneList<CharacterRange>(3);
3863 CharacterRange::AddClassEscape('n', newline_ranges);
3864 RegExpCharacterClass* newline_atom =
3865 new RegExpCharacterClass(newline_ranges, false);
Christian Plesner Hansen 2009/01/19 10:28:59 You should be able to just use "new RegExpCharacte
3866 ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
3867 elms->Add(TextElement::CharClass(newline_atom));
3868 TextNode* newline_matcher = new TextNode(
Christian Plesner Hansen 2009/01/19 10:28:59 You should be able to use "new TextNode(newline_at
3869 elms,
3870 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
3871 position_register,
3872 on_success));
3873 // Create an end-of-input matcher.
3874 RegExpNode* end_of_line = ActionNode::BeginSubmatch(
3875 stack_pointer_register,
3876 position_register,
3877 newline_matcher);
3878 // Add the two alternatives to the ChoiceNode.
3879 GuardedAlternative eol_alternative(end_of_line);
3880 result->AddAlternative(eol_alternative);
3881 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
3882 result->AddAlternative(end_alternative);
3883 return result;
3766 } 3884 }
3767 return on_success->PropagateForward(&info); 3885 UNREACHABLE();
Christian Plesner Hansen 2009/01/19 10:28:59 Maybe have this in the 'default' case, that way th
3886 return on_success;
3768 } 3887 }
3769 3888
3770 3889
3771 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, 3890 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
3772 RegExpNode* on_success) { 3891 RegExpNode* on_success) {
3773 return new BackReferenceNode(RegExpCapture::StartRegister(index()), 3892 return new BackReferenceNode(RegExpCapture::StartRegister(index()),
3774 RegExpCapture::EndRegister(index()), 3893 RegExpCapture::EndRegister(index()),
3775 on_success); 3894 on_success);
3776 } 3895 }
3777 3896
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
3904 AddClassNegated(kLineTerminatorRanges, 4023 AddClassNegated(kLineTerminatorRanges,
3905 kLineTerminatorRangeCount, 4024 kLineTerminatorRangeCount,
3906 ranges); 4025 ranges);
3907 break; 4026 break;
3908 // This is not a character range as defined by the spec but a 4027 // This is not a character range as defined by the spec but a
3909 // convenient shorthand for a character class that matches any 4028 // convenient shorthand for a character class that matches any
3910 // character. 4029 // character.
3911 case '*': 4030 case '*':
3912 ranges->Add(CharacterRange::Everything()); 4031 ranges->Add(CharacterRange::Everything());
3913 break; 4032 break;
4033 // This is the set of characters matched by the $ and ^ symbols
4034 // in multiline mode.
4035 case 'n':
4036 AddClass(kLineTerminatorRanges,
4037 kLineTerminatorRangeCount,
4038 ranges);
4039 break;
3914 default: 4040 default:
3915 UNREACHABLE(); 4041 UNREACHABLE();
3916 } 4042 }
3917 } 4043 }
3918 4044
3919 4045
3920 Vector<const uc16> CharacterRange::GetWordBounds() { 4046 Vector<const uc16> CharacterRange::GetWordBounds() {
3921 return Vector<const uc16>(kWordRanges, kWordRangeCount); 4047 return Vector<const uc16>(kWordRanges, kWordRangeCount);
3922 } 4048 }
3923 4049
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
4089 4215
4090 template <class C> 4216 template <class C>
4091 static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) { 4217 static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
4092 NodeInfo full_info(*node->info()); 4218 NodeInfo full_info(*node->info());
4093 full_info.AddFromPreceding(info); 4219 full_info.AddFromPreceding(info);
4094 bool cloned = false; 4220 bool cloned = false;
4095 return RegExpNode::EnsureSibling(node, &full_info, &cloned); 4221 return RegExpNode::EnsureSibling(node, &full_info, &cloned);
4096 } 4222 }
4097 4223
4098 4224
4099 RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
Christian Plesner Hansen 2009/01/19 10:28:59 O joy, we can get rid of all this cra..., I mean s
4100 NodeInfo full_info(*this->info());
4101 full_info.AddFromPreceding(info);
4102 bool cloned = false;
4103 ActionNode* action = EnsureSibling(this, &full_info, &cloned);
4104 action->set_on_success(action->on_success()->PropagateForward(info));
4105 return action;
4106 }
4107
4108
4109 RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
4110 NodeInfo full_info(*this->info());
4111 full_info.AddFromPreceding(info);
4112 bool cloned = false;
4113 ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
4114 if (cloned) {
4115 ZoneList<GuardedAlternative>* old_alternatives = alternatives();
4116 int count = old_alternatives->length();
4117 choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
4118 for (int i = 0; i < count; i++) {
4119 GuardedAlternative alternative = old_alternatives->at(i);
4120 alternative.set_node(alternative.node()->PropagateForward(info));
4121 choice->alternatives()->Add(alternative);
4122 }
4123 }
4124 return choice;
4125 }
4126
4127
4128 RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
4129 return PropagateToEndpoint(this, info);
4130 }
4131
4132
4133 RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
4134 NodeInfo full_info(*this->info());
4135 full_info.AddFromPreceding(info);
4136 bool cloned = false;
4137 BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);
4138 if (cloned) {
4139 // TODO(erikcorry): A back reference has to have two successors (by default
4140 // the same node). The first is used if the back reference matches a non-
4141 // empty back reference, the second if it matches an empty one. This
4142 // doesn't matter for at_end, which is the only one implemented right now,
4143 // but it will matter for other pieces of info.
4144 back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
4145 }
4146 return back_ref;
4147 }
4148
4149
4150 RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
4151 return PropagateToEndpoint(this, info);
4152 }
4153
4154
4155 // ------------------------------------------------------------------- 4225 // -------------------------------------------------------------------
4156 // Splay tree 4226 // Splay tree
4157 4227
4158 4228
4159 OutSet* OutSet::Extend(unsigned value) { 4229 OutSet* OutSet::Extend(unsigned value) {
4160 if (Get(value)) 4230 if (Get(value))
4161 return this; 4231 return this;
4162 if (successors() != NULL) { 4232 if (successors() != NULL) {
4163 for (int i = 0; i < successors()->length(); i++) { 4233 for (int i = 0; i < successors()->length(); i++) {
4164 OutSet* successor = successors()->at(i); 4234 OutSet* successor = successors()->at(i);
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
4382 EnsureAnalyzed(that->loop_node()); 4452 EnsureAnalyzed(that->loop_node());
4383 info->AddFromFollowing(that->loop_node()->info()); 4453 info->AddFromFollowing(that->loop_node()->info());
4384 } 4454 }
4385 4455
4386 4456
4387 void Analysis::VisitBackReference(BackReferenceNode* that) { 4457 void Analysis::VisitBackReference(BackReferenceNode* that) {
4388 EnsureAnalyzed(that->on_success()); 4458 EnsureAnalyzed(that->on_success());
4389 } 4459 }
4390 4460
4391 4461
4462 void Analysis::VisitAssertion(AssertionNode* that) {
4463 EnsureAnalyzed(that->on_success());
4464 }
4465
4466
4392 // ------------------------------------------------------------------- 4467 // -------------------------------------------------------------------
4393 // Dispatch table construction 4468 // Dispatch table construction
4394 4469
4395 4470
4396 void DispatchTableConstructor::VisitEnd(EndNode* that) { 4471 void DispatchTableConstructor::VisitEnd(EndNode* that) {
4397 AddRange(CharacterRange::Everything()); 4472 AddRange(CharacterRange::Everything());
4398 } 4473 }
4399 4474
4400 4475
4401 void DispatchTableConstructor::BuildTable(ChoiceNode* node) { 4476 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
4434 } 4509 }
4435 4510
4436 4511
4437 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) { 4512 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
4438 // TODO(160): Find the node that we refer back to and propagate its start 4513 // TODO(160): Find the node that we refer back to and propagate its start
4439 // set back to here. For now we just accept anything. 4514 // set back to here. For now we just accept anything.
4440 AddRange(CharacterRange::Everything()); 4515 AddRange(CharacterRange::Everything());
4441 } 4516 }
4442 4517
4443 4518
4519 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
4520 RegExpNode* target = that->on_success();
4521 target->Accept(this);
4522 }
4523
4524
4444 4525
4445 static int CompareRangeByFrom(const CharacterRange* a, 4526 static int CompareRangeByFrom(const CharacterRange* a,
4446 const CharacterRange* b) { 4527 const CharacterRange* b) {
4447 return Compare<uc16>(a->from(), b->from()); 4528 return Compare<uc16>(a->from(), b->from());
4448 } 4529 }
4449 4530
4450 4531
4451 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { 4532 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
4452 ranges->Sort(CompareRangeByFrom); 4533 ranges->Sort(CompareRangeByFrom);
4453 uc16 last = 0; 4534 uc16 last = 0;
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
4520 false, 4601 false,
4521 new RegExpCharacterClass('*'), 4602 new RegExpCharacterClass('*'),
4522 &compiler, 4603 &compiler,
4523 captured_body); 4604 captured_body);
4524 data->node = node; 4605 data->node = node;
4525 Analysis analysis(ignore_case); 4606 Analysis analysis(ignore_case);
4526 analysis.EnsureAnalyzed(node); 4607 analysis.EnsureAnalyzed(node);
4527 4608
4528 NodeInfo info = *node->info(); 4609 NodeInfo info = *node->info();
4529 4610
4530 if (is_multiline && !FLAG_attempt_multiline_irregexp) {
4531 return Handle<FixedArray>::null();
4532 }
4533
4534 if (FLAG_irregexp_native) { 4611 if (FLAG_irregexp_native) {
4535 #ifdef ARM 4612 #ifdef ARM
4536 // Unimplemented, fall-through to bytecode implementation. 4613 // Unimplemented, fall-through to bytecode implementation.
4537 #else // IA32 4614 #else // IA32
4538 RegExpMacroAssemblerIA32::Mode mode; 4615 RegExpMacroAssemblerIA32::Mode mode;
4539 if (is_ascii) { 4616 if (is_ascii) {
4540 mode = RegExpMacroAssemblerIA32::ASCII; 4617 mode = RegExpMacroAssemblerIA32::ASCII;
4541 } else { 4618 } else {
4542 mode = RegExpMacroAssemblerIA32::UC16; 4619 mode = RegExpMacroAssemblerIA32::UC16;
4543 } 4620 }
4544 RegExpMacroAssemblerIA32 macro_assembler(mode, 4621 RegExpMacroAssemblerIA32 macro_assembler(mode,
4545 (data->capture_count + 1) * 2); 4622 (data->capture_count + 1) * 2);
4546 return compiler.Assemble(&macro_assembler, 4623 return compiler.Assemble(&macro_assembler,
4547 node, 4624 node,
4548 data->capture_count, 4625 data->capture_count,
4549 pattern); 4626 pattern);
4550 #endif 4627 #endif
4551 } 4628 }
4552 EmbeddedVector<byte, 1024> codes; 4629 EmbeddedVector<byte, 1024> codes;
4553 RegExpMacroAssemblerIrregexp macro_assembler(codes); 4630 RegExpMacroAssemblerIrregexp macro_assembler(codes);
4554 return compiler.Assemble(&macro_assembler, 4631 return compiler.Assemble(&macro_assembler,
4555 node, 4632 node,
4556 data->capture_count, 4633 data->capture_count,
4557 pattern); 4634 pattern);
4558 } 4635 }
4559 4636
4560 4637
4561 }} // namespace v8::internal 4638 }} // namespace v8::internal
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698