OLD | NEW |
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2009 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 1413 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1424 } | 1424 } |
1425 | 1425 |
1426 | 1426 |
1427 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, | 1427 static void EmitCharClass(RegExpMacroAssembler* macro_assembler, |
1428 RegExpCharacterClass* cc, | 1428 RegExpCharacterClass* cc, |
1429 bool ascii, | 1429 bool ascii, |
1430 Label* on_failure, | 1430 Label* on_failure, |
1431 int cp_offset, | 1431 int cp_offset, |
1432 bool check_offset, | 1432 bool check_offset, |
1433 bool preloaded) { | 1433 bool preloaded) { |
1434 if (cc->is_standard() && | |
1435 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), | |
1436 cp_offset, | |
1437 check_offset, | |
1438 on_failure)) { | |
1439 return; | |
1440 } | |
1441 | |
1442 ZoneList<CharacterRange>* ranges = cc->ranges(); | 1434 ZoneList<CharacterRange>* ranges = cc->ranges(); |
1443 int max_char; | 1435 int max_char; |
1444 if (ascii) { | 1436 if (ascii) { |
1445 max_char = String::kMaxAsciiCharCode; | 1437 max_char = String::kMaxAsciiCharCode; |
1446 } else { | 1438 } else { |
1447 max_char = String::kMaxUC16CharCode; | 1439 max_char = String::kMaxUC16CharCode; |
1448 } | 1440 } |
1449 | 1441 |
1450 Label success; | 1442 Label success; |
1451 | 1443 |
(...skipping 30 matching lines...) Expand all Loading... |
1482 if (check_offset) { | 1474 if (check_offset) { |
1483 macro_assembler->CheckPosition(cp_offset, on_failure); | 1475 macro_assembler->CheckPosition(cp_offset, on_failure); |
1484 } | 1476 } |
1485 return; | 1477 return; |
1486 } | 1478 } |
1487 | 1479 |
1488 if (!preloaded) { | 1480 if (!preloaded) { |
1489 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); | 1481 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); |
1490 } | 1482 } |
1491 | 1483 |
| 1484 if (cc->is_standard() && |
| 1485 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), |
| 1486 on_failure)) { |
| 1487 return; |
| 1488 } |
| 1489 |
1492 for (int i = 0; i < last_valid_range; i++) { | 1490 for (int i = 0; i < last_valid_range; i++) { |
1493 CharacterRange& range = ranges->at(i); | 1491 CharacterRange& range = ranges->at(i); |
1494 Label next_range; | 1492 Label next_range; |
1495 uc16 from = range.from(); | 1493 uc16 from = range.from(); |
1496 uc16 to = range.to(); | 1494 uc16 to = range.to(); |
1497 if (from > max_char) { | 1495 if (from > max_char) { |
1498 continue; | 1496 continue; |
1499 } | 1497 } |
1500 if (to > max_char) to = max_char; | 1498 if (to > max_char) to = max_char; |
1501 if (to == from) { | 1499 if (to == from) { |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1619 | 1617 |
1620 int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) { | 1618 int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) { |
1621 int answer = Length(); | 1619 int answer = Length(); |
1622 if (answer >= still_to_find) return answer; | 1620 if (answer >= still_to_find) return answer; |
1623 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer; | 1621 if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer; |
1624 return answer + on_success()->EatsAtLeast(still_to_find - answer, | 1622 return answer + on_success()->EatsAtLeast(still_to_find - answer, |
1625 recursion_depth + 1); | 1623 recursion_depth + 1); |
1626 } | 1624 } |
1627 | 1625 |
1628 | 1626 |
1629 int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find, | 1627 int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, |
1630 int recursion_depth) { | 1628 int recursion_depth) { |
1631 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; | 1629 if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; |
1632 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 1630 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
1633 // afterwards. | 1631 // afterwards. |
1634 RegExpNode* node = alternatives_->at(1).node(); | 1632 RegExpNode* node = alternatives_->at(1).node(); |
1635 return node->EatsAtLeast(still_to_find, recursion_depth + 1); | 1633 return node->EatsAtLeast(still_to_find, recursion_depth + 1); |
1636 } | 1634 } |
1637 | 1635 |
1638 | 1636 |
1639 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( | 1637 void NegativeLookaheadChoiceNode::GetQuickCheckDetails( |
1640 QuickCheckDetails* details, | 1638 QuickCheckDetails* details, |
(...skipping 401 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2042 details->Merge(&new_details, characters_filled_in); | 2040 details->Merge(&new_details, characters_filled_in); |
2043 } | 2041 } |
2044 } | 2042 } |
2045 | 2043 |
2046 | 2044 |
2047 // Check for [0-9A-Z_a-z]. | 2045 // Check for [0-9A-Z_a-z]. |
2048 static void EmitWordCheck(RegExpMacroAssembler* assembler, | 2046 static void EmitWordCheck(RegExpMacroAssembler* assembler, |
2049 Label* word, | 2047 Label* word, |
2050 Label* non_word, | 2048 Label* non_word, |
2051 bool fall_through_on_word) { | 2049 bool fall_through_on_word) { |
| 2050 if (assembler->CheckSpecialCharacterClass( |
| 2051 fall_through_on_word ? 'w' : 'W', |
| 2052 fall_through_on_word ? non_word : word)) { |
| 2053 // Optimized implementation available. |
| 2054 return; |
| 2055 } |
2052 assembler->CheckCharacterGT('z', non_word); | 2056 assembler->CheckCharacterGT('z', non_word); |
2053 assembler->CheckCharacterLT('0', non_word); | 2057 assembler->CheckCharacterLT('0', non_word); |
2054 assembler->CheckCharacterGT('a' - 1, word); | 2058 assembler->CheckCharacterGT('a' - 1, word); |
2055 assembler->CheckCharacterLT('9' + 1, word); | 2059 assembler->CheckCharacterLT('9' + 1, word); |
2056 assembler->CheckCharacterLT('A', non_word); | 2060 assembler->CheckCharacterLT('A', non_word); |
2057 assembler->CheckCharacterLT('Z' + 1, word); | 2061 assembler->CheckCharacterLT('Z' + 1, word); |
2058 if (fall_through_on_word) { | 2062 if (fall_through_on_word) { |
2059 assembler->CheckNotCharacter('_', non_word); | 2063 assembler->CheckNotCharacter('_', non_word); |
2060 } else { | 2064 } else { |
2061 assembler->CheckCharacter('_', word); | 2065 assembler->CheckCharacter('_', word); |
(...skipping 16 matching lines...) Expand all Loading... |
2078 if (new_trace.cp_offset() == 0) { | 2082 if (new_trace.cp_offset() == 0) { |
2079 // The start of input counts as a newline in this context, so skip to | 2083 // The start of input counts as a newline in this context, so skip to |
2080 // ok if we are at the start. | 2084 // ok if we are at the start. |
2081 assembler->CheckAtStart(&ok); | 2085 assembler->CheckAtStart(&ok); |
2082 } | 2086 } |
2083 // We already checked that we are not at the start of input so it must be | 2087 // We already checked that we are not at the start of input so it must be |
2084 // OK to load the previous character. | 2088 // OK to load the previous character. |
2085 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, | 2089 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, |
2086 new_trace.backtrack(), | 2090 new_trace.backtrack(), |
2087 false); | 2091 false); |
2088 // Newline means \n, \r, 0x2028 or 0x2029. | 2092 if (!assembler->CheckSpecialCharacterClass('n', |
2089 if (!compiler->ascii()) { | 2093 new_trace.backtrack())) { |
2090 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); | 2094 // Newline means \n, \r, 0x2028 or 0x2029. |
| 2095 if (!compiler->ascii()) { |
| 2096 assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); |
| 2097 } |
| 2098 assembler->CheckCharacter('\n', &ok); |
| 2099 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
2091 } | 2100 } |
2092 assembler->CheckCharacter('\n', &ok); | |
2093 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | |
2094 assembler->Bind(&ok); | 2101 assembler->Bind(&ok); |
2095 on_success->Emit(compiler, &new_trace); | 2102 on_success->Emit(compiler, &new_trace); |
2096 } | 2103 } |
2097 | 2104 |
2098 | 2105 |
| 2106 // Emit the code to handle \b and \B (word-boundary or non-word-boundary) |
| 2107 // when we know whether the next character must be a word character or not. |
| 2108 static void EmitHalfBoundaryCheck(AssertionNode::AssertionNodeType type, |
| 2109 RegExpCompiler* compiler, |
| 2110 RegExpNode* on_success, |
| 2111 Trace* trace) { |
| 2112 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 2113 Label done; |
| 2114 |
| 2115 Trace new_trace(*trace); |
| 2116 |
| 2117 bool expect_word_character = (type == AssertionNode::AFTER_WORD_CHARACTER); |
| 2118 Label* on_word = expect_word_character ? &done : new_trace.backtrack(); |
| 2119 Label* on_non_word = expect_word_character ? new_trace.backtrack() : &done; |
| 2120 |
| 2121 // Check whether previous character was a word character. |
| 2122 switch(trace->at_start()) { |
| 2123 case Trace::TRUE: |
| 2124 if (expect_word_character) { |
| 2125 assembler->GoTo(on_non_word); |
| 2126 } |
| 2127 break; |
| 2128 case Trace::UNKNOWN: |
| 2129 ASSERT_EQ(0, trace->cp_offset()); |
| 2130 assembler->CheckAtStart(on_non_word); |
| 2131 // Fall through. |
| 2132 case Trace::FALSE: |
| 2133 int prev_char_offset = trace->cp_offset() - 1; |
| 2134 assembler->LoadCurrentCharacter(prev_char_offset, NULL, false, 1); |
| 2135 EmitWordCheck(assembler, on_word, on_non_word, expect_word_character); |
| 2136 // We may or may not have loaded the previous character. |
| 2137 new_trace.InvalidateCurrentCharacter(); |
| 2138 } |
| 2139 |
| 2140 assembler->Bind(&done); |
| 2141 |
| 2142 on_success->Emit(compiler, &new_trace); |
| 2143 } |
| 2144 |
| 2145 |
2099 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 2146 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
2100 static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type, | 2147 static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type, |
2101 RegExpCompiler* compiler, | 2148 RegExpCompiler* compiler, |
2102 RegExpNode* on_success, | 2149 RegExpNode* on_success, |
2103 Trace* trace) { | 2150 Trace* trace) { |
2104 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 2151 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
2105 Label before_non_word; | 2152 Label before_non_word; |
2106 Label before_word; | 2153 Label before_word; |
2107 if (trace->characters_preloaded() != 1) { | 2154 if (trace->characters_preloaded() != 1) { |
2108 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); | 2155 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word); |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2198 Trace at_start_trace = *trace; | 2245 Trace at_start_trace = *trace; |
2199 at_start_trace.set_at_start(true); | 2246 at_start_trace.set_at_start(true); |
2200 on_success()->Emit(compiler, &at_start_trace); | 2247 on_success()->Emit(compiler, &at_start_trace); |
2201 return; | 2248 return; |
2202 } | 2249 } |
2203 } | 2250 } |
2204 break; | 2251 break; |
2205 case AFTER_NEWLINE: | 2252 case AFTER_NEWLINE: |
2206 EmitHat(compiler, on_success(), trace); | 2253 EmitHat(compiler, on_success(), trace); |
2207 return; | 2254 return; |
2208 case AT_NON_BOUNDARY: | |
2209 case AT_BOUNDARY: | 2255 case AT_BOUNDARY: |
| 2256 case AT_NON_BOUNDARY: { |
2210 EmitBoundaryCheck(type_, compiler, on_success(), trace); | 2257 EmitBoundaryCheck(type_, compiler, on_success(), trace); |
2211 return; | 2258 return; |
| 2259 } |
| 2260 case AFTER_WORD_CHARACTER: |
| 2261 case AFTER_NONWORD_CHARACTER: { |
| 2262 EmitHalfBoundaryCheck(type_, compiler, on_success(), trace); |
| 2263 } |
2212 } | 2264 } |
2213 on_success()->Emit(compiler, trace); | 2265 on_success()->Emit(compiler, trace); |
2214 } | 2266 } |
2215 | 2267 |
2216 | 2268 |
2217 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) { | 2269 static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) { |
2218 if (quick_check == NULL) return false; | 2270 if (quick_check == NULL) return false; |
2219 if (offset >= quick_check->characters()) return false; | 2271 if (offset >= quick_check->characters()) return false; |
2220 return quick_check->positions(offset)->determines_perfectly; | 2272 return quick_check->positions(offset)->determines_perfectly; |
2221 } | 2273 } |
(...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2784 } | 2836 } |
2785 continue; | 2837 continue; |
2786 } else { | 2838 } else { |
2787 // No quick check was generated. Put the full code here. | 2839 // No quick check was generated. Put the full code here. |
2788 // If this is not the first choice then there could be slow checks from | 2840 // If this is not the first choice then there could be slow checks from |
2789 // previous cases that go here when they fail. There's no reason to | 2841 // previous cases that go here when they fail. There's no reason to |
2790 // insist that they preload characters since the slow check we are about | 2842 // insist that they preload characters since the slow check we are about |
2791 // to generate probably can't use it. | 2843 // to generate probably can't use it. |
2792 if (i != first_normal_choice) { | 2844 if (i != first_normal_choice) { |
2793 alt_gen->expects_preload = false; | 2845 alt_gen->expects_preload = false; |
2794 new_trace.set_characters_preloaded(0); | 2846 new_trace.InvalidateCurrentCharacter(); |
2795 } | 2847 } |
2796 if (i < choice_count - 1) { | 2848 if (i < choice_count - 1) { |
2797 new_trace.set_backtrack(&alt_gen->after); | 2849 new_trace.set_backtrack(&alt_gen->after); |
2798 } | 2850 } |
2799 generate_full_check_inline = true; | 2851 generate_full_check_inline = true; |
2800 } | 2852 } |
2801 if (generate_full_check_inline) { | 2853 if (generate_full_check_inline) { |
2802 if (new_trace.actions() != NULL) { | 2854 if (new_trace.actions() != NULL) { |
2803 new_trace.set_flush_budget(new_flush_budget); | 2855 new_trace.set_flush_budget(new_flush_budget); |
2804 } | 2856 } |
(...skipping 470 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3275 break; | 3327 break; |
3276 case AssertionNode::AT_BOUNDARY: | 3328 case AssertionNode::AT_BOUNDARY: |
3277 stream()->Add("label=\"\\b\", shape=septagon"); | 3329 stream()->Add("label=\"\\b\", shape=septagon"); |
3278 break; | 3330 break; |
3279 case AssertionNode::AT_NON_BOUNDARY: | 3331 case AssertionNode::AT_NON_BOUNDARY: |
3280 stream()->Add("label=\"\\B\", shape=septagon"); | 3332 stream()->Add("label=\"\\B\", shape=septagon"); |
3281 break; | 3333 break; |
3282 case AssertionNode::AFTER_NEWLINE: | 3334 case AssertionNode::AFTER_NEWLINE: |
3283 stream()->Add("label=\"(?<=\\n)\", shape=septagon"); | 3335 stream()->Add("label=\"(?<=\\n)\", shape=septagon"); |
3284 break; | 3336 break; |
| 3337 case AssertionNode::AFTER_WORD_CHARACTER: |
| 3338 stream()->Add("label=\"(?<=\\w)\", shape=septagon"); |
| 3339 break; |
| 3340 case AssertionNode::AFTER_NONWORD_CHARACTER: |
| 3341 stream()->Add("label=\"(?<=\\W)\", shape=septagon"); |
| 3342 break; |
3285 } | 3343 } |
3286 stream()->Add("];\n"); | 3344 stream()->Add("];\n"); |
3287 PrintAttributes(that); | 3345 PrintAttributes(that); |
3288 RegExpNode* successor = that->on_success(); | 3346 RegExpNode* successor = that->on_success(); |
3289 stream()->Add(" n%p -> n%p;\n", that, successor); | 3347 stream()->Add(" n%p -> n%p;\n", that, successor); |
3290 Visit(successor); | 3348 Visit(successor); |
3291 } | 3349 } |
3292 | 3350 |
3293 | 3351 |
3294 void DotPrinter::VisitAction(ActionNode* that) { | 3352 void DotPrinter::VisitAction(ActionNode* that) { |
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3477 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { | 3535 if (CompareInverseRanges(set_.ranges(), kSpaceRanges, kSpaceRangeCount)) { |
3478 set_.set_standard_set_type('S'); | 3536 set_.set_standard_set_type('S'); |
3479 return true; | 3537 return true; |
3480 } | 3538 } |
3481 if (CompareInverseRanges(set_.ranges(), | 3539 if (CompareInverseRanges(set_.ranges(), |
3482 kLineTerminatorRanges, | 3540 kLineTerminatorRanges, |
3483 kLineTerminatorRangeCount)) { | 3541 kLineTerminatorRangeCount)) { |
3484 set_.set_standard_set_type('.'); | 3542 set_.set_standard_set_type('.'); |
3485 return true; | 3543 return true; |
3486 } | 3544 } |
| 3545 if (CompareRanges(set_.ranges(), |
| 3546 kLineTerminatorRanges, |
| 3547 kLineTerminatorRangeCount)) { |
| 3548 set_.set_standard_set_type('n'); |
| 3549 return true; |
| 3550 } |
| 3551 if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
| 3552 set_.set_standard_set_type('w'); |
| 3553 return true; |
| 3554 } |
| 3555 if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { |
| 3556 set_.set_standard_set_type('W'); |
| 3557 return true; |
| 3558 } |
3487 return false; | 3559 return false; |
3488 } | 3560 } |
3489 | 3561 |
3490 | 3562 |
3491 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, | 3563 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, |
3492 RegExpNode* on_success) { | 3564 RegExpNode* on_success) { |
3493 return new TextNode(this, on_success); | 3565 return new TextNode(this, on_success); |
3494 } | 3566 } |
3495 | 3567 |
3496 | 3568 |
(...skipping 506 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4003 start = pos = block_end + 1; | 4075 start = pos = block_end + 1; |
4004 } | 4076 } |
4005 } else { | 4077 } else { |
4006 // Unibrow ranges don't work for high characters due to the "2^11 bug". | 4078 // Unibrow ranges don't work for high characters due to the "2^11 bug". |
4007 // Therefore we do something dumber for these ranges. | 4079 // Therefore we do something dumber for these ranges. |
4008 AddUncanonicals(ranges, bottom, top); | 4080 AddUncanonicals(ranges, bottom, top); |
4009 } | 4081 } |
4010 } | 4082 } |
4011 | 4083 |
4012 | 4084 |
| 4085 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) { |
| 4086 ASSERT_NOT_NULL(ranges); |
| 4087 int n = ranges->length(); |
| 4088 if (n <= 1) return true; |
| 4089 int max = ranges->at(0).to(); |
| 4090 for (int i = 1; i < n; i++) { |
| 4091 CharacterRange next_range = ranges->at(i); |
| 4092 if (next_range.from() <= max + 1) return false; |
| 4093 max = next_range.to(); |
| 4094 } |
| 4095 return true; |
| 4096 } |
| 4097 |
| 4098 SetRelation CharacterRange::WordCharacterRelation( |
| 4099 ZoneList<CharacterRange>* range) { |
| 4100 ASSERT(IsCanonical(range)); |
| 4101 int i = 0; // Word character range index. |
| 4102 int j = 0; // Argument range index. |
| 4103 ASSERT_NE(0, kWordRangeCount); |
| 4104 SetRelation result; |
| 4105 if (range->length() == 0) { |
| 4106 result.SetElementsInSecondSet(); |
| 4107 return result; |
| 4108 } |
| 4109 CharacterRange argument_range = range->at(0); |
| 4110 CharacterRange word_range = CharacterRange(kWordRanges[0], kWordRanges[1]); |
| 4111 while (i < kWordRangeCount && j < range->length()) { |
| 4112 // Check the two ranges for the five cases: |
| 4113 // - no overlap. |
| 4114 // - partial overlap (there are elements in both ranges that isn't |
| 4115 // in the other, and there are also elements that are in both). |
| 4116 // - argument range entirely inside word range. |
| 4117 // - word range entirely inside argument range. |
| 4118 // - ranges are completely equal. |
| 4119 |
| 4120 // First check for no overlap. The earlier range is not in the other set. |
| 4121 if (argument_range.from() > word_range.to()) { |
| 4122 // Ranges are disjoint. The earlier word range contains elements that |
| 4123 // cannot be in the argument set. |
| 4124 result.SetElementsInSecondSet(); |
| 4125 } else if (word_range.from() > argument_range.to()) { |
| 4126 // Ranges are disjoint. The earlier argument range contains elements that |
| 4127 // cannot be in the word set. |
| 4128 result.SetElementsInFirstSet(); |
| 4129 } else if (word_range.from() <= argument_range.from() && |
| 4130 word_range.to() >= argument_range.from()) { |
| 4131 result.SetElementsInBothSets(); |
| 4132 // argument range completely inside word range. |
| 4133 if (word_range.from() < argument_range.from() || |
| 4134 word_range.to() > argument_range.from()) { |
| 4135 result.SetElementsInSecondSet(); |
| 4136 } |
| 4137 } else if (word_range.from() >= argument_range.from() && |
| 4138 word_range.to() <= argument_range.from()) { |
| 4139 result.SetElementsInBothSets(); |
| 4140 result.SetElementsInFirstSet(); |
| 4141 } else { |
| 4142 // There is overlap, and neither is a subrange of the other |
| 4143 result.SetElementsInFirstSet(); |
| 4144 result.SetElementsInSecondSet(); |
| 4145 result.SetElementsInBothSets(); |
| 4146 } |
| 4147 if (result.NonTrivialIntersection()) { |
| 4148 // The result is as (im)precise as we can possibly make it. |
| 4149 return result; |
| 4150 } |
| 4151 // Progress the range(s) with minimal to-character. |
| 4152 uc16 word_to = word_range.to(); |
| 4153 uc16 argument_to = argument_range.to(); |
| 4154 if (argument_to <= word_to) { |
| 4155 j++; |
| 4156 if (j < range->length()) { |
| 4157 argument_range = range->at(j); |
| 4158 } |
| 4159 } |
| 4160 if (word_to <= argument_to) { |
| 4161 i += 2; |
| 4162 if (i < kWordRangeCount) { |
| 4163 word_range = CharacterRange(kWordRanges[i], kWordRanges[i + 1]); |
| 4164 } |
| 4165 } |
| 4166 } |
| 4167 // Check if anything wasn't compared in the loop. |
| 4168 if (i < kWordRangeCount) { |
| 4169 // word range contains something not in argument range. |
| 4170 result.SetElementsInSecondSet(); |
| 4171 } else if (j < range->length()){ |
| 4172 // Argument range contains something not in word range. |
| 4173 result.SetElementsInFirstSet(); |
| 4174 } |
| 4175 |
| 4176 return result; |
| 4177 } |
| 4178 |
| 4179 |
4013 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, | 4180 static void AddUncanonicals(ZoneList<CharacterRange>* ranges, |
4014 int bottom, | 4181 int bottom, |
4015 int top) { | 4182 int top) { |
4016 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 4183 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
4017 // Zones with no case mappings. There is a DEBUG-mode loop to assert that | 4184 // Zones with no case mappings. There is a DEBUG-mode loop to assert that |
4018 // this table is correct. | 4185 // this table is correct. |
4019 // 0x0600 - 0x0fff | 4186 // 0x0600 - 0x0fff |
4020 // 0x1100 - 0x1cff | 4187 // 0x1100 - 0x1cff |
4021 // 0x2000 - 0x20ff | 4188 // 0x2000 - 0x20ff |
4022 // 0x2200 - 0x23ff | 4189 // 0x2200 - 0x23ff |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4112 | 4279 |
4113 ZoneList<CharacterRange>* CharacterSet::ranges() { | 4280 ZoneList<CharacterRange>* CharacterSet::ranges() { |
4114 if (ranges_ == NULL) { | 4281 if (ranges_ == NULL) { |
4115 ranges_ = new ZoneList<CharacterRange>(2); | 4282 ranges_ = new ZoneList<CharacterRange>(2); |
4116 CharacterRange::AddClassEscape(standard_set_type_, ranges_); | 4283 CharacterRange::AddClassEscape(standard_set_type_, ranges_); |
4117 } | 4284 } |
4118 return ranges_; | 4285 return ranges_; |
4119 } | 4286 } |
4120 | 4287 |
4121 | 4288 |
| 4289 // Move a number of elements in a zonelist to another position |
| 4290 // in the same list. Handles overlapping source and target areas. |
| 4291 static void MoveRanges(ZoneList<CharacterRange>* list, |
| 4292 int from, |
| 4293 int to, |
| 4294 int count) { |
| 4295 // Ranges are potentially overlapping. |
| 4296 if (from < to) { |
| 4297 for (int i = count - 1; i >= 0; i--) { |
| 4298 list->at(to + i) = list->at(from + i); |
| 4299 } |
| 4300 } else { |
| 4301 for (int i = 0; i < count; i++) { |
| 4302 list->at(to + i) = list->at(from + i); |
| 4303 } |
| 4304 } |
| 4305 } |
| 4306 |
| 4307 |
| 4308 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, |
| 4309 int count, |
| 4310 CharacterRange insert) { |
| 4311 // Inserts a range into list[0..count[, which must be sorted |
| 4312 // by from value and non-overlapping and non-adjacent, using at most |
| 4313 // list[0..count] for the result. Returns the number of resulting |
| 4314 // canonicalized ranges. Inserting a range may collapse existing ranges into |
| 4315 // fewer ranges, so the return value can be anything in the range 1..count+1. |
| 4316 uc16 from = insert.from(); |
| 4317 uc16 to = insert.to(); |
| 4318 int start_pos = 0; |
| 4319 int end_pos = count; |
| 4320 for (int i = count - 1; i >= 0; i--) { |
| 4321 CharacterRange current = list->at(i); |
| 4322 if (current.from() > to + 1) { |
| 4323 end_pos = i; |
| 4324 } else if (current.to() + 1 < from) { |
| 4325 start_pos = i + 1; |
| 4326 break; |
| 4327 } |
| 4328 } |
| 4329 |
| 4330 // Inserted range overlaps, or is adjacent to, ranges at positions |
| 4331 // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are |
| 4332 // not affected by the insertion. |
| 4333 // If start_pos == end_pos, the range must be inserted before start_pos. |
| 4334 // if start_pos < end_pos, the entire range from start_pos to end_pos |
| 4335 // must be merged with the insert range. |
| 4336 |
| 4337 if (start_pos == end_pos) { |
| 4338 // Insert between existing ranges at position start_pos. |
| 4339 if (start_pos < count) { |
| 4340 MoveRanges(list, start_pos, start_pos + 1, count - start_pos); |
| 4341 } |
| 4342 list->at(start_pos) = insert; |
| 4343 return count + 1; |
| 4344 } |
| 4345 if (start_pos + 1 == end_pos) { |
| 4346 // Replace single existing range at position start_pos. |
| 4347 CharacterRange to_replace = list->at(start_pos); |
| 4348 int new_from = Min(to_replace.from(), from); |
| 4349 int new_to = Max(to_replace.to(), to); |
| 4350 list->at(start_pos) = CharacterRange(new_from, new_to); |
| 4351 return count; |
| 4352 } |
| 4353 // Replace a number of existing ranges from start_pos to end_pos - 1. |
| 4354 // Move the remaining ranges down. |
| 4355 |
| 4356 int new_from = Min(list->at(start_pos).from(), from); |
| 4357 int new_to = Max(list->at(end_pos - 1).to(), to); |
| 4358 if (end_pos < count) { |
| 4359 MoveRanges(list, end_pos, start_pos + 1, count - end_pos); |
| 4360 } |
| 4361 list->at(start_pos) = CharacterRange(new_from, new_to); |
| 4362 return count - (end_pos - start_pos) + 1; |
| 4363 } |
| 4364 |
| 4365 |
| 4366 void CharacterSet::Canonicalize() { |
| 4367 // Special/default classes are always considered canonical. The result |
| 4368 // of calling ranges() will be sorted. |
| 4369 if (ranges_ == NULL) return; |
| 4370 CharacterRange::Canonicalize(ranges_); |
| 4371 } |
| 4372 |
| 4373 |
| 4374 void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) { |
| 4375 if (character_ranges->length() <= 1) return; |
| 4376 // Check whether ranges are already canonical (increasing, non-overlapping, |
| 4377 // non-adjacent). |
| 4378 int n = character_ranges->length(); |
| 4379 int max = character_ranges->at(0).to(); |
| 4380 int i = 1; |
| 4381 while (i < n) { |
| 4382 CharacterRange current = character_ranges->at(i); |
| 4383 if (current.from() <= max + 1) { |
| 4384 break; |
| 4385 } |
| 4386 max = current.to(); |
| 4387 i++; |
| 4388 } |
| 4389 // Canonical until the i'th range. If that's all of them, we are done. |
| 4390 if (i == n) return; |
| 4391 |
| 4392 // The ranges at index i and forward are not canonicalized. Make them so by |
| 4393 // doing the equivalent of insertion sort (inserting each into the previous |
| 4394 // list, in order). |
| 4395 // Notice that inserting a range can reduce the number of ranges in the |
| 4396 // result due to combining of adjacent and overlapping ranges. |
| 4397 int read = i; // Range to insert. |
| 4398 int num_canonical = i; // Length of canonicalized part of list. |
| 4399 do { |
| 4400 num_canonical = InsertRangeInCanonicalList(character_ranges, |
| 4401 num_canonical, |
| 4402 character_ranges->at(read)); |
| 4403 read++; |
| 4404 } while (read < n); |
| 4405 character_ranges->Rewind(num_canonical); |
| 4406 |
| 4407 ASSERT(CharacterRange::IsCanonical(character_ranges)); |
| 4408 } |
| 4409 |
| 4410 |
| 4411 // Utility function for CharacterRange::Merge. Adds a range at the end of |
| 4412 // a canonicalized range list, if necessary merging the range with the last |
| 4413 // range of the list. |
| 4414 static void AddRangeToSet(ZoneList<CharacterRange>* set, CharacterRange range) { |
| 4415 if (set == NULL) return; |
| 4416 ASSERT(set->length() == 0 || set->at(set->length() - 1).to() < range.from()); |
| 4417 int n = set->length(); |
| 4418 if (n > 0) { |
| 4419 CharacterRange lastRange = set->at(n - 1); |
| 4420 if (lastRange.to() == range.from() - 1) { |
| 4421 set->at(n - 1) = CharacterRange(lastRange.from(), range.to()); |
| 4422 return; |
| 4423 } |
| 4424 } |
| 4425 set->Add(range); |
| 4426 } |
| 4427 |
| 4428 |
| 4429 static void AddRangeToSelectedSet(int selector, |
| 4430 ZoneList<CharacterRange>* first_set, |
| 4431 ZoneList<CharacterRange>* second_set, |
| 4432 ZoneList<CharacterRange>* intersection_set, |
| 4433 CharacterRange range) { |
| 4434 switch (selector) { |
| 4435 case kInsideFirst: |
| 4436 AddRangeToSet(first_set, range); |
| 4437 break; |
| 4438 case kInsideSecond: |
| 4439 AddRangeToSet(second_set, range); |
| 4440 break; |
| 4441 case kInsideBoth: |
| 4442 AddRangeToSet(intersection_set, range); |
| 4443 break; |
| 4444 } |
| 4445 } |
| 4446 |
| 4447 |
| 4448 |
| 4449 void CharacterRange::Merge(ZoneList<CharacterRange>* first_set, |
| 4450 ZoneList<CharacterRange>* second_set, |
| 4451 ZoneList<CharacterRange>* first_set_only_out, |
| 4452 ZoneList<CharacterRange>* second_set_only_out, |
| 4453 ZoneList<CharacterRange>* both_sets_out) { |
| 4454 // Inputs are canonicalized. |
| 4455 ASSERT(CharacterRange::IsCanonical(first_set)); |
| 4456 ASSERT(CharacterRange::IsCanonical(second_set)); |
| 4457 // Outputs are empty, if applicable. |
| 4458 ASSERT(first_set_only_out == NULL || first_set_only_out->length() == 0); |
| 4459 ASSERT(second_set_only_out == NULL || second_set_only_out->length() == 0); |
| 4460 ASSERT(both_sets_out == NULL || both_sets_out->length() == 0); |
| 4461 |
| 4462 // Merge sets by iterating through the lists in order of lowest "from" value, |
| 4463 // and putting intervals into one of three sets. |
| 4464 |
| 4465 if (first_set->length() == 0) { |
| 4466 second_set_only_out->AddAll(*second_set); |
| 4467 return; |
| 4468 } |
| 4469 if (second_set->length() == 0) { |
| 4470 first_set_only_out->AddAll(*first_set); |
| 4471 return; |
| 4472 } |
| 4473 // Indices into input lists. |
| 4474 int i1 = 0; |
| 4475 int i2 = 0; |
| 4476 // Cache length of input lists. |
| 4477 int n1 = first_set->length(); |
| 4478 int n2 = second_set->length(); |
| 4479 // Current range. May be invalid if state is kInsideNone. |
| 4480 int from = 0; |
| 4481 int to = -1; |
| 4482 // Where current range comes from. |
| 4483 int state = kInsideNone; |
| 4484 |
| 4485 while (i1 < n1 || i2 < n2) { |
| 4486 CharacterRange next_range; |
| 4487 int range_source; |
| 4488 if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) { |
| 4489 next_range = first_set->at(i1++); |
| 4490 range_source = kInsideFirst; |
| 4491 } else { |
| 4492 next_range = second_set->at(i2++); |
| 4493 range_source = kInsideSecond; |
| 4494 } |
| 4495 if (to < next_range.from()) { |
| 4496 // Ranges disjoint: |current| |next| |
| 4497 AddRangeToSelectedSet(state, |
| 4498 first_set_only_out, |
| 4499 second_set_only_out, |
| 4500 both_sets_out, |
| 4501 CharacterRange(from, to)); |
| 4502 from = next_range.from(); |
| 4503 to = next_range.to(); |
| 4504 state = range_source; |
| 4505 } else { |
| 4506 if (from < next_range.from()) { |
| 4507 AddRangeToSelectedSet(state, |
| 4508 first_set_only_out, |
| 4509 second_set_only_out, |
| 4510 both_sets_out, |
| 4511 CharacterRange(from, next_range.from()-1)); |
| 4512 } |
| 4513 if (to < next_range.to()) { |
| 4514 // Ranges overlap: |current| |
| 4515 // |next| |
| 4516 AddRangeToSelectedSet(state | range_source, |
| 4517 first_set_only_out, |
| 4518 second_set_only_out, |
| 4519 both_sets_out, |
| 4520 CharacterRange(next_range.from(), to)); |
| 4521 from = to + 1; |
| 4522 to = next_range.to(); |
| 4523 state = range_source; |
| 4524 } else { |
| 4525 // Range included: |current| , possibly ending at same character. |
| 4526 // |next| |
| 4527 AddRangeToSelectedSet( |
| 4528 state | range_source, |
| 4529 first_set_only_out, |
| 4530 second_set_only_out, |
| 4531 both_sets_out, |
| 4532 CharacterRange(next_range.from(), next_range.to())); |
| 4533 from = next_range.to() + 1; |
| 4534 // If ranges end at same character, both ranges are consumed completely. |
| 4535 if (next_range.to() == to) state = kInsideNone; |
| 4536 } |
| 4537 } |
| 4538 } |
| 4539 AddRangeToSelectedSet(state, |
| 4540 first_set_only_out, |
| 4541 second_set_only_out, |
| 4542 both_sets_out, |
| 4543 CharacterRange(from, to)); |
| 4544 } |
| 4545 |
| 4546 |
| 4547 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, |
| 4548 ZoneList<CharacterRange>* negated_ranges) { |
| 4549 ASSERT(CharacterRange::IsCanonical(ranges)); |
| 4550 ASSERT_EQ(0, negated_ranges->length()); |
| 4551 int range_count = ranges->length(); |
| 4552 uc16 from = 0; |
| 4553 int i = 0; |
| 4554 if (range_count > 0 && ranges->at(0).from() == 0) { |
| 4555 from = ranges->at(0).to(); |
| 4556 i = 1; |
| 4557 } |
| 4558 while (i < range_count) { |
| 4559 CharacterRange range = ranges->at(i); |
| 4560 negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); |
| 4561 from = range.to(); |
| 4562 i++; |
| 4563 } |
| 4564 if (from < String::kMaxUC16CharCode) { |
| 4565 negated_ranges->Add(CharacterRange(from + 1, String::kMaxUC16CharCode)); |
| 4566 } |
| 4567 } |
| 4568 |
| 4569 |
4122 | 4570 |
4123 // ------------------------------------------------------------------- | 4571 // ------------------------------------------------------------------- |
4124 // Interest propagation | 4572 // Interest propagation |
4125 | 4573 |
4126 | 4574 |
4127 RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) { | 4575 RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) { |
4128 for (int i = 0; i < siblings_.length(); i++) { | 4576 for (int i = 0; i < siblings_.length(); i++) { |
4129 RegExpNode* sibling = siblings_.Get(i); | 4577 RegExpNode* sibling = siblings_.Get(i); |
4130 if (sibling->info()->Matches(info)) | 4578 if (sibling->info()->Matches(info)) |
4131 return sibling; | 4579 return sibling; |
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4403 } | 4851 } |
4404 | 4852 |
4405 | 4853 |
4406 void Analysis::VisitBackReference(BackReferenceNode* that) { | 4854 void Analysis::VisitBackReference(BackReferenceNode* that) { |
4407 EnsureAnalyzed(that->on_success()); | 4855 EnsureAnalyzed(that->on_success()); |
4408 } | 4856 } |
4409 | 4857 |
4410 | 4858 |
4411 void Analysis::VisitAssertion(AssertionNode* that) { | 4859 void Analysis::VisitAssertion(AssertionNode* that) { |
4412 EnsureAnalyzed(that->on_success()); | 4860 EnsureAnalyzed(that->on_success()); |
4413 } | 4861 AssertionNode::AssertionNodeType type = that->type(); |
| 4862 if (type == AssertionNode::AT_BOUNDARY || |
| 4863 type == AssertionNode::AT_NON_BOUNDARY) { |
| 4864 // Check if the following character is known to be a word character |
| 4865 // or known to not be a word character. |
| 4866 ZoneList<CharacterRange>* following_chars = that->FirstCharacterSet(); |
| 4867 |
| 4868 CharacterRange::Canonicalize(following_chars); |
| 4869 |
| 4870 SetRelation word_relation = |
| 4871 CharacterRange::WordCharacterRelation(following_chars); |
| 4872 if (word_relation.ContainedIn()) { |
| 4873 // Following character is definitely a word character. |
| 4874 type = (type == AssertionNode::AT_BOUNDARY) ? |
| 4875 AssertionNode::AFTER_NONWORD_CHARACTER : |
| 4876 AssertionNode::AFTER_WORD_CHARACTER; |
| 4877 that->set_type(type); |
| 4878 } else if (word_relation.Disjoint()) { |
| 4879 // Following character is definitely *not* a word character. |
| 4880 type = (type == AssertionNode::AT_BOUNDARY) ? |
| 4881 AssertionNode::AFTER_WORD_CHARACTER : |
| 4882 AssertionNode::AFTER_NONWORD_CHARACTER; |
| 4883 that->set_type(type); |
| 4884 } |
| 4885 } |
| 4886 } |
| 4887 |
| 4888 |
| 4889 ZoneList<CharacterRange>* RegExpNode::FirstCharacterSet() { |
| 4890 if (first_character_set_ == NULL) { |
| 4891 if (ComputeFirstCharacterSet(kFirstCharBudget) < 0) { |
| 4892 // If we can't find an exact solution within the budget, we |
| 4893 // set the value to the set of every character, i.e., all characters |
| 4894 // are possible. |
| 4895 ZoneList<CharacterRange>* all_set = new ZoneList<CharacterRange>(1); |
| 4896 all_set->Add(CharacterRange::Everything()); |
| 4897 first_character_set_ = all_set; |
| 4898 } |
| 4899 } |
| 4900 return first_character_set_; |
| 4901 } |
| 4902 |
| 4903 |
| 4904 int RegExpNode::ComputeFirstCharacterSet(int budget) { |
| 4905 // Default behavior is to not be able to determine the first character. |
| 4906 return kComputeFirstCharacterSetFail; |
| 4907 } |
| 4908 |
| 4909 |
| 4910 int LoopChoiceNode::ComputeFirstCharacterSet(int budget) { |
| 4911 budget--; |
| 4912 if (budget >= 0) { |
| 4913 // Find loop min-iteration. It's the value of the guarded choice node |
| 4914 // with a GEQ guard, if any. |
| 4915 int min_repetition = 0; |
| 4916 |
| 4917 for (int i = 0; i <= 1; i++) { |
| 4918 GuardedAlternative alternative = alternatives()->at(i); |
| 4919 ZoneList<Guard*>* guards = alternative.guards(); |
| 4920 if (guards != NULL && guards->length() > 0) { |
| 4921 Guard* guard = guards->at(0); |
| 4922 if (guard->op() == Guard::GEQ) { |
| 4923 min_repetition = guard->value(); |
| 4924 break; |
| 4925 } |
| 4926 } |
| 4927 } |
| 4928 |
| 4929 budget = loop_node()->ComputeFirstCharacterSet(budget); |
| 4930 if (budget >= 0) { |
| 4931 ZoneList<CharacterRange>* character_set = |
| 4932 loop_node()->first_character_set(); |
| 4933 if (body_can_be_zero_length() || min_repetition == 0) { |
| 4934 budget = continue_node()->ComputeFirstCharacterSet(budget); |
| 4935 if (budget < 0) return budget; |
| 4936 ZoneList<CharacterRange>* body_set = |
| 4937 continue_node()->first_character_set(); |
| 4938 ZoneList<CharacterRange>* union_set = |
| 4939 new ZoneList<CharacterRange>(Max(character_set->length(), |
| 4940 body_set->length())); |
| 4941 CharacterRange::Merge(character_set, |
| 4942 body_set, |
| 4943 union_set, |
| 4944 union_set, |
| 4945 union_set); |
| 4946 character_set = union_set; |
| 4947 } |
| 4948 set_first_character_set(character_set); |
| 4949 } |
| 4950 } |
| 4951 return budget; |
| 4952 } |
| 4953 |
| 4954 |
| 4955 int NegativeLookaheadChoiceNode::ComputeFirstCharacterSet(int budget) { |
| 4956 budget--; |
| 4957 if (budget >= 0) { |
| 4958 GuardedAlternative successor = this->alternatives()->at(1); |
| 4959 RegExpNode* successor_node = successor.node(); |
| 4960 budget = successor_node->ComputeFirstCharacterSet(budget); |
| 4961 if (budget >= 0) { |
| 4962 set_first_character_set(successor_node->first_character_set()); |
| 4963 } |
| 4964 } |
| 4965 return budget; |
| 4966 } |
| 4967 |
| 4968 |
| 4969 // The first character set of an EndNode is unknowable. Just use the |
| 4970 // default implementation that fails and returns all characters as possible. |
| 4971 |
| 4972 |
| 4973 int AssertionNode::ComputeFirstCharacterSet(int budget) { |
| 4974 budget -= 1; |
| 4975 if (budget >= 0) { |
| 4976 switch (type_) { |
| 4977 case AT_END: { |
| 4978 set_first_character_set(new ZoneList<CharacterRange>(0)); |
| 4979 break; |
| 4980 } |
| 4981 case AT_START: |
| 4982 case AT_BOUNDARY: |
| 4983 case AT_NON_BOUNDARY: |
| 4984 case AFTER_NEWLINE: |
| 4985 case AFTER_NONWORD_CHARACTER: |
| 4986 case AFTER_WORD_CHARACTER: { |
| 4987 ASSERT_NOT_NULL(on_success()); |
| 4988 budget = on_success()->ComputeFirstCharacterSet(budget); |
| 4989 set_first_character_set(on_success()->first_character_set()); |
| 4990 break; |
| 4991 } |
| 4992 } |
| 4993 } |
| 4994 return budget; |
| 4995 } |
| 4996 |
| 4997 |
| 4998 int ActionNode::ComputeFirstCharacterSet(int budget) { |
| 4999 if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail; |
| 5000 budget--; |
| 5001 if (budget >= 0) { |
| 5002 ASSERT_NOT_NULL(on_success()); |
| 5003 budget = on_success()->ComputeFirstCharacterSet(budget); |
| 5004 if (budget >= 0) { |
| 5005 set_first_character_set(on_success()->first_character_set()); |
| 5006 } |
| 5007 } |
| 5008 return budget; |
| 5009 } |
| 5010 |
| 5011 |
| 5012 int BackReferenceNode::ComputeFirstCharacterSet(int budget) { |
| 5013 // We don't know anything about the first character of a backreference |
| 5014 // at this point. |
| 5015 return kComputeFirstCharacterSetFail; |
| 5016 } |
| 5017 |
| 5018 |
| 5019 int TextNode::ComputeFirstCharacterSet(int budget) { |
| 5020 budget--; |
| 5021 if (budget >= 0) { |
| 5022 ASSERT_NE(0, elements()->length()); |
| 5023 TextElement text = elements()->at(0); |
| 5024 if (text.type == TextElement::ATOM) { |
| 5025 RegExpAtom* atom = text.data.u_atom; |
| 5026 ASSERT_NE(0, atom->length()); |
| 5027 uc16 first_char = atom->data()[0]; |
| 5028 ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1); |
| 5029 range->Add(CharacterRange(first_char, first_char)); |
| 5030 set_first_character_set(range); |
| 5031 } else { |
| 5032 ASSERT(text.type == TextElement::CHAR_CLASS); |
| 5033 RegExpCharacterClass* char_class = text.data.u_char_class; |
| 5034 if (char_class->is_negated()) { |
| 5035 ZoneList<CharacterRange>* ranges = char_class->ranges(); |
| 5036 int length = ranges->length(); |
| 5037 int new_length = length + 1; |
| 5038 if (length > 0) { |
| 5039 if (ranges->at(0).from() == 0) new_length--; |
| 5040 if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) { |
| 5041 new_length--; |
| 5042 } |
| 5043 } |
| 5044 ZoneList<CharacterRange>* negated_ranges = |
| 5045 new ZoneList<CharacterRange>(new_length); |
| 5046 CharacterRange::Negate(ranges, negated_ranges); |
| 5047 set_first_character_set(negated_ranges); |
| 5048 } else { |
| 5049 set_first_character_set(char_class->ranges()); |
| 5050 } |
| 5051 } |
| 5052 } |
| 5053 return budget; |
| 5054 } |
| 5055 |
4414 | 5056 |
4415 | 5057 |
4416 // ------------------------------------------------------------------- | 5058 // ------------------------------------------------------------------- |
4417 // Dispatch table construction | 5059 // Dispatch table construction |
4418 | 5060 |
4419 | 5061 |
4420 void DispatchTableConstructor::VisitEnd(EndNode* that) { | 5062 void DispatchTableConstructor::VisitEnd(EndNode* that) { |
4421 AddRange(CharacterRange::Everything()); | 5063 AddRange(CharacterRange::Everything()); |
4422 } | 5064 } |
4423 | 5065 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4464 AddRange(CharacterRange::Everything()); | 5106 AddRange(CharacterRange::Everything()); |
4465 } | 5107 } |
4466 | 5108 |
4467 | 5109 |
4468 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) { | 5110 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) { |
4469 RegExpNode* target = that->on_success(); | 5111 RegExpNode* target = that->on_success(); |
4470 target->Accept(this); | 5112 target->Accept(this); |
4471 } | 5113 } |
4472 | 5114 |
4473 | 5115 |
4474 | |
4475 static int CompareRangeByFrom(const CharacterRange* a, | 5116 static int CompareRangeByFrom(const CharacterRange* a, |
4476 const CharacterRange* b) { | 5117 const CharacterRange* b) { |
4477 return Compare<uc16>(a->from(), b->from()); | 5118 return Compare<uc16>(a->from(), b->from()); |
4478 } | 5119 } |
4479 | 5120 |
4480 | 5121 |
4481 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { | 5122 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) { |
4482 ranges->Sort(CompareRangeByFrom); | 5123 ranges->Sort(CompareRangeByFrom); |
4483 uc16 last = 0; | 5124 uc16 last = 0; |
4484 for (int i = 0; i < ranges->length(); i++) { | 5125 for (int i = 0; i < ranges->length(); i++) { |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4600 RegExpMacroAssemblerIrregexp macro_assembler(codes); | 5241 RegExpMacroAssemblerIrregexp macro_assembler(codes); |
4601 #endif | 5242 #endif |
4602 | 5243 |
4603 return compiler.Assemble(¯o_assembler, | 5244 return compiler.Assemble(¯o_assembler, |
4604 node, | 5245 node, |
4605 data->capture_count, | 5246 data->capture_count, |
4606 pattern); | 5247 pattern); |
4607 } | 5248 } |
4608 | 5249 |
4609 }} // namespace v8::internal | 5250 }} // namespace v8::internal |
OLD | NEW |