Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
| 6 | 6 |
| 7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
| 8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
| 9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
| 10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
| (...skipping 1570 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1581 macro_assembler->IfRegisterLT(guard->reg(), | 1581 macro_assembler->IfRegisterLT(guard->reg(), |
| 1582 guard->value(), | 1582 guard->value(), |
| 1583 trace->backtrack()); | 1583 trace->backtrack()); |
| 1584 break; | 1584 break; |
| 1585 } | 1585 } |
| 1586 } | 1586 } |
| 1587 | 1587 |
| 1588 | 1588 |
| 1589 // Returns the number of characters in the equivalence class, omitting those | 1589 // Returns the number of characters in the equivalence class, omitting those |
| 1590 // that cannot occur in the source string because it is Latin1. | 1590 // that cannot occur in the source string because it is Latin1. |
| 1591 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, | 1591 static int GetCaseIndependentLetters(RegExpCompiler* compiler, uc16 character, |
| 1592 bool one_byte_subject, | 1592 bool one_byte_subject, uc32* letters) { |
|
erikcorry
2016/02/01 15:21:40
The compiler knows whether we have a one-byte subj
Yang
2016/02/02 06:06:48
Done.
| |
| 1593 unibrow::uchar* letters) { | 1593 int length; |
| 1594 int length = | 1594 #ifdef V8_I18N_SUPPORT |
| 1595 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); | 1595 if (compiler->unicode()) { |
| 1596 // Unibrow returns 0 or 1 for characters where case independence is | 1596 USet* set = uset_open(character, character); |
| 1597 // trivial. | 1597 uset_closeOver(set, USET_CASE_INSENSITIVE); |
| 1598 if (length == 0) { | 1598 uset_removeAllStrings(set); |
| 1599 letters[0] = character; | 1599 length = uset_size(set); |
| 1600 length = 1; | 1600 for (int i = 0; i < length; i++) { |
| 1601 letters[i] = uset_charAt(set, i); | |
| 1602 } | |
| 1603 uset_close(set); | |
| 1604 } else // NOLINT | |
| 1605 // Fallback in case ICU is not included. | |
| 1606 #endif // V8_I18N_SUPPORT | |
| 1607 { | |
| 1608 length = compiler->isolate()->jsregexp_uncanonicalize()->get(character, | |
| 1609 '\0', letters); | |
| 1610 // Unibrow returns 0 or 1 for characters where case independence is | |
| 1611 // trivial. | |
| 1612 if (length == 0) { | |
| 1613 letters[0] = character; | |
| 1614 length = 1; | |
| 1615 } | |
| 1601 } | 1616 } |
| 1602 | 1617 |
| 1603 if (one_byte_subject) { | 1618 if (one_byte_subject) { |
| 1604 int new_length = 0; | 1619 int new_length = 0; |
| 1605 for (int i = 0; i < length; i++) { | 1620 for (int i = 0; i < length; i++) { |
| 1606 if (letters[i] <= String::kMaxOneByteCharCode) { | 1621 if (letters[i] <= String::kMaxOneByteCharCode) { |
| 1607 letters[new_length++] = letters[i]; | 1622 letters[new_length++] = letters[i]; |
| 1608 } | 1623 } |
| 1609 } | 1624 } |
| 1610 length = new_length; | 1625 length = new_length; |
| 1611 } | 1626 } |
| 1612 | 1627 |
| 1613 return length; | 1628 return length; |
| 1614 } | 1629 } |
| 1615 | 1630 |
| 1616 | 1631 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, uc16 c, |
| 1617 static inline bool EmitSimpleCharacter(Isolate* isolate, | 1632 Label* on_failure, int cp_offset, |
| 1618 RegExpCompiler* compiler, | 1633 bool check, bool preloaded) { |
| 1619 uc16 c, | |
| 1620 Label* on_failure, | |
| 1621 int cp_offset, | |
| 1622 bool check, | |
| 1623 bool preloaded) { | |
| 1624 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1634 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 1625 bool bound_checked = false; | 1635 bool bound_checked = false; |
| 1626 if (!preloaded) { | 1636 if (!preloaded) { |
| 1627 assembler->LoadCurrentCharacter( | 1637 assembler->LoadCurrentCharacter( |
| 1628 cp_offset, | 1638 cp_offset, |
| 1629 on_failure, | 1639 on_failure, |
| 1630 check); | 1640 check); |
| 1631 bound_checked = true; | 1641 bound_checked = true; |
| 1632 } | 1642 } |
| 1633 assembler->CheckNotCharacter(c, on_failure); | 1643 assembler->CheckNotCharacter(c, on_failure); |
| 1634 return bound_checked; | 1644 return bound_checked; |
| 1635 } | 1645 } |
| 1636 | 1646 |
| 1637 | 1647 |
| 1638 // Only emits non-letters (things that don't have case). Only used for case | 1648 // Only emits non-letters (things that don't have case). Only used for case |
| 1639 // independent matches. | 1649 // independent matches. |
| 1640 static inline bool EmitAtomNonLetter(Isolate* isolate, | 1650 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, uc16 c, |
| 1641 RegExpCompiler* compiler, | 1651 Label* on_failure, int cp_offset, |
| 1642 uc16 c, | 1652 bool check, bool preloaded) { |
| 1643 Label* on_failure, | |
| 1644 int cp_offset, | |
| 1645 bool check, | |
| 1646 bool preloaded) { | |
| 1647 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1653 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1648 bool one_byte = compiler->one_byte(); | 1654 bool one_byte = compiler->one_byte(); |
| 1649 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1655 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1650 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); | 1656 int length = GetCaseIndependentLetters(compiler, c, one_byte, chars); |
| 1651 if (length < 1) { | 1657 if (length < 1) { |
| 1652 // This can't match. Must be an one-byte subject and a non-one-byte | 1658 // This can't match. Must be an one-byte subject and a non-one-byte |
| 1653 // character. We do not need to do anything since the one-byte pass | 1659 // character. We do not need to do anything since the one-byte pass |
| 1654 // already handled this. | 1660 // already handled this. |
| 1655 return false; // Bounds not checked. | 1661 return false; // Bounds not checked. |
| 1656 } | 1662 } |
| 1657 bool checked = false; | 1663 bool checked = false; |
| 1658 // We handle the length > 1 case in a later pass. | 1664 // We handle the length > 1 case in a later pass. |
| 1659 if (length == 1) { | 1665 if (length == 1) { |
| 1660 if (one_byte && c > String::kMaxOneByteCharCodeU) { | |
|
erikcorry
2016/02/01 15:21:40
I think this is still worth having. For these cha
Yang
2016/02/02 06:06:48
I'm confused here. Wouldn't the FilterOneByte pass
| |
| 1661 // Can't match - see above. | |
| 1662 return false; // Bounds not checked. | |
| 1663 } | |
| 1664 if (!preloaded) { | 1666 if (!preloaded) { |
| 1665 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1667 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 1666 checked = check; | 1668 checked = check; |
| 1667 } | 1669 } |
| 1668 macro_assembler->CheckNotCharacter(c, on_failure); | 1670 macro_assembler->CheckNotCharacter(c, on_failure); |
| 1669 } | 1671 } |
| 1670 return checked; | 1672 return checked; |
| 1671 } | 1673 } |
| 1672 | 1674 |
| 1673 | 1675 |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 1700 uc16 mask = char_mask ^ diff; | 1702 uc16 mask = char_mask ^ diff; |
| 1701 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 1703 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
| 1702 diff, | 1704 diff, |
| 1703 mask, | 1705 mask, |
| 1704 on_failure); | 1706 on_failure); |
| 1705 return true; | 1707 return true; |
| 1706 } | 1708 } |
| 1707 return false; | 1709 return false; |
| 1708 } | 1710 } |
| 1709 | 1711 |
| 1710 | 1712 typedef bool EmitCharacterFunction(RegExpCompiler* compiler, uc16 c, |
| 1711 typedef bool EmitCharacterFunction(Isolate* isolate, | 1713 Label* on_failure, int cp_offset, bool check, |
| 1712 RegExpCompiler* compiler, | |
| 1713 uc16 c, | |
| 1714 Label* on_failure, | |
| 1715 int cp_offset, | |
| 1716 bool check, | |
| 1717 bool preloaded); | 1714 bool preloaded); |
| 1718 | 1715 |
| 1719 // Only emits letters (things that have case). Only used for case independent | 1716 // Only emits letters (things that have case). Only used for case independent |
| 1720 // matches. | 1717 // matches. |
| 1721 static inline bool EmitAtomLetter(Isolate* isolate, | 1718 static inline bool EmitAtomLetter(RegExpCompiler* compiler, uc16 c, |
| 1722 RegExpCompiler* compiler, | 1719 Label* on_failure, int cp_offset, bool check, |
| 1723 uc16 c, | |
| 1724 Label* on_failure, | |
| 1725 int cp_offset, | |
| 1726 bool check, | |
| 1727 bool preloaded) { | 1720 bool preloaded) { |
| 1728 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1721 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1729 bool one_byte = compiler->one_byte(); | 1722 bool one_byte = compiler->one_byte(); |
| 1730 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1723 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1731 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); | 1724 int length = GetCaseIndependentLetters(compiler, c, one_byte, chars); |
| 1732 if (length <= 1) return false; | 1725 if (length <= 1) return false; |
| 1733 // We may not need to check against the end of the input string | 1726 // We may not need to check against the end of the input string |
| 1734 // if this character lies before a character that matched. | 1727 // if this character lies before a character that matched. |
| 1735 if (!preloaded) { | 1728 if (!preloaded) { |
| 1736 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1729 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 1737 } | 1730 } |
| 1738 Label ok; | 1731 Label ok; |
| 1739 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1732 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
| 1740 switch (length) { | 1733 switch (length) { |
| 1741 case 2: { | 1734 case 2: { |
| (...skipping 528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2270 int ActionNode::EatsAtLeast(int still_to_find, | 2263 int ActionNode::EatsAtLeast(int still_to_find, |
| 2271 int budget, | 2264 int budget, |
| 2272 bool not_at_start) { | 2265 bool not_at_start) { |
| 2273 if (budget <= 0) return 0; | 2266 if (budget <= 0) return 0; |
| 2274 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 2267 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
| 2275 return on_success()->EatsAtLeast(still_to_find, | 2268 return on_success()->EatsAtLeast(still_to_find, |
| 2276 budget - 1, | 2269 budget - 1, |
| 2277 not_at_start); | 2270 not_at_start); |
| 2278 } | 2271 } |
| 2279 | 2272 |
| 2280 | 2273 void ActionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, |
| 2281 void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, | |
| 2282 BoyerMooreLookahead* bm, bool not_at_start) { | 2274 BoyerMooreLookahead* bm, bool not_at_start) { |
| 2283 if (action_type_ == BEGIN_SUBMATCH) { | 2275 if (action_type_ == BEGIN_SUBMATCH) { |
| 2284 bm->SetRest(offset); | 2276 bm->SetRest(offset); |
| 2285 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 2277 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
| 2286 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); | 2278 on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
| 2287 } | 2279 } |
| 2288 SaveBMInfo(bm, not_at_start, offset); | 2280 SaveBMInfo(bm, not_at_start, offset); |
| 2289 } | 2281 } |
| 2290 | 2282 |
| 2291 | 2283 |
| 2292 int AssertionNode::EatsAtLeast(int still_to_find, | 2284 int AssertionNode::EatsAtLeast(int still_to_find, |
| 2293 int budget, | 2285 int budget, |
| 2294 bool not_at_start) { | 2286 bool not_at_start) { |
| 2295 if (budget <= 0) return 0; | 2287 if (budget <= 0) return 0; |
| 2296 // If we know we are not at the start and we are asked "how many characters | 2288 // If we know we are not at the start and we are asked "how many characters |
| 2297 // will you match if you succeed?" then we can answer anything since false | 2289 // will you match if you succeed?" then we can answer anything since false |
| 2298 // implies false. So lets just return the max answer (still_to_find) since | 2290 // implies false. So lets just return the max answer (still_to_find) since |
| 2299 // that won't prevent us from preloading a lot of characters for the other | 2291 // that won't prevent us from preloading a lot of characters for the other |
| 2300 // branches in the node graph. | 2292 // branches in the node graph. |
| 2301 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 2293 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
| 2302 return on_success()->EatsAtLeast(still_to_find, | 2294 return on_success()->EatsAtLeast(still_to_find, |
| 2303 budget - 1, | 2295 budget - 1, |
| 2304 not_at_start); | 2296 not_at_start); |
| 2305 } | 2297 } |
| 2306 | 2298 |
| 2307 | 2299 void AssertionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
| 2308 void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, | 2300 int budget, BoyerMooreLookahead* bm, |
| 2309 BoyerMooreLookahead* bm, bool not_at_start) { | 2301 bool not_at_start) { |
| 2310 // Match the behaviour of EatsAtLeast on this node. | 2302 // Match the behaviour of EatsAtLeast on this node. |
| 2311 if (assertion_type() == AT_START && not_at_start) return; | 2303 if (assertion_type() == AT_START && not_at_start) return; |
| 2312 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); | 2304 on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
| 2313 SaveBMInfo(bm, not_at_start, offset); | 2305 SaveBMInfo(bm, not_at_start, offset); |
| 2314 } | 2306 } |
| 2315 | 2307 |
| 2316 | 2308 |
| 2317 int BackReferenceNode::EatsAtLeast(int still_to_find, | 2309 int BackReferenceNode::EatsAtLeast(int still_to_find, |
| 2318 int budget, | 2310 int budget, |
| 2319 bool not_at_start) { | 2311 bool not_at_start) { |
| 2320 if (read_backward()) return 0; | 2312 if (read_backward()) return 0; |
| 2321 if (budget <= 0) return 0; | 2313 if (budget <= 0) return 0; |
| 2322 return on_success()->EatsAtLeast(still_to_find, | 2314 return on_success()->EatsAtLeast(still_to_find, |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2516 // The masks and values for the positions will be combined into a single | 2508 // The masks and values for the positions will be combined into a single |
| 2517 // machine word for the current character width in order to be used in | 2509 // machine word for the current character width in order to be used in |
| 2518 // generating a quick check. | 2510 // generating a quick check. |
| 2519 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2511 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2520 RegExpCompiler* compiler, | 2512 RegExpCompiler* compiler, |
| 2521 int characters_filled_in, | 2513 int characters_filled_in, |
| 2522 bool not_at_start) { | 2514 bool not_at_start) { |
| 2523 // Do not collect any quick check details if the text node reads backward, | 2515 // Do not collect any quick check details if the text node reads backward, |
| 2524 // since it reads in the opposite direction than we use for quick checks. | 2516 // since it reads in the opposite direction than we use for quick checks. |
| 2525 if (read_backward()) return; | 2517 if (read_backward()) return; |
| 2526 Isolate* isolate = compiler->macro_assembler()->isolate(); | |
| 2527 DCHECK(characters_filled_in < details->characters()); | 2518 DCHECK(characters_filled_in < details->characters()); |
| 2528 int characters = details->characters(); | 2519 int characters = details->characters(); |
| 2529 int char_mask; | 2520 int char_mask; |
| 2530 if (compiler->one_byte()) { | 2521 if (compiler->one_byte()) { |
| 2531 char_mask = String::kMaxOneByteCharCode; | 2522 char_mask = String::kMaxOneByteCharCode; |
| 2532 } else { | 2523 } else { |
| 2533 char_mask = String::kMaxUtf16CodeUnit; | 2524 char_mask = String::kMaxUtf16CodeUnit; |
| 2534 } | 2525 } |
| 2535 for (int k = 0; k < elements()->length(); k++) { | 2526 for (int k = 0; k < elements()->length(); k++) { |
| 2536 TextElement elm = elements()->at(k); | 2527 TextElement elm = elements()->at(k); |
| 2537 if (elm.text_type() == TextElement::ATOM) { | 2528 if (elm.text_type() == TextElement::ATOM) { |
| 2538 Vector<const uc16> quarks = elm.atom()->data(); | 2529 Vector<const uc16> quarks = elm.atom()->data(); |
| 2539 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2530 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2540 QuickCheckDetails::Position* pos = | 2531 QuickCheckDetails::Position* pos = |
| 2541 details->positions(characters_filled_in); | 2532 details->positions(characters_filled_in); |
| 2542 uc16 c = quarks[i]; | 2533 uc16 c = quarks[i]; |
| 2543 if (compiler->ignore_case()) { | 2534 if (compiler->ignore_case()) { |
| 2544 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2535 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 2545 int length = GetCaseIndependentLetters(isolate, c, | 2536 int length = GetCaseIndependentLetters(compiler, c, |
| 2546 compiler->one_byte(), chars); | 2537 compiler->one_byte(), chars); |
| 2547 if (length == 0) { | 2538 if (length == 0) { |
| 2548 // This can happen because all case variants are non-Latin1, but we | 2539 // This can happen because all case variants are non-Latin1, but we |
| 2549 // know the input is Latin1. | 2540 // know the input is Latin1. |
| 2550 details->set_cannot_match(); | 2541 details->set_cannot_match(); |
| 2551 pos->determines_perfectly = false; | 2542 pos->determines_perfectly = false; |
| 2552 return; | 2543 return; |
| 2553 } | 2544 } |
| 2554 if (length == 1) { | 2545 if (length == 1) { |
| 2555 // This letter has no case equivalents, so it's nice and simple | 2546 // This letter has no case equivalents, so it's nice and simple |
| (...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2741 DCHECK(!info->visited); | 2732 DCHECK(!info->visited); |
| 2742 info->visited = true; | 2733 info->visited = true; |
| 2743 } | 2734 } |
| 2744 ~VisitMarker() { | 2735 ~VisitMarker() { |
| 2745 info_->visited = false; | 2736 info_->visited = false; |
| 2746 } | 2737 } |
| 2747 private: | 2738 private: |
| 2748 NodeInfo* info_; | 2739 NodeInfo* info_; |
| 2749 }; | 2740 }; |
| 2750 | 2741 |
| 2751 | 2742 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| 2752 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) { | |
| 2753 if (info()->replacement_calculated) return replacement(); | 2743 if (info()->replacement_calculated) return replacement(); |
| 2754 if (depth < 0) return this; | 2744 if (depth < 0) return this; |
| 2755 DCHECK(!info()->visited); | 2745 DCHECK(!info()->visited); |
| 2756 VisitMarker marker(info()); | 2746 VisitMarker marker(info()); |
| 2757 return FilterSuccessor(depth - 1, ignore_case); | 2747 return FilterSuccessor(depth - 1, compiler); |
| 2758 } | 2748 } |
| 2759 | 2749 |
| 2760 | 2750 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, |
| 2761 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { | 2751 RegExpCompiler* compiler) { |
| 2762 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); | 2752 RegExpNode* next = on_success_->FilterOneByte(depth - 1, compiler); |
| 2763 if (next == NULL) return set_replacement(NULL); | 2753 if (next == NULL) return set_replacement(NULL); |
| 2764 on_success_ = next; | 2754 on_success_ = next; |
| 2765 return set_replacement(this); | 2755 return set_replacement(this); |
| 2766 } | 2756 } |
| 2767 | 2757 |
| 2768 | 2758 |
| 2769 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 2759 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
| 2770 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 2760 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
| 2771 // TODO(dcarney): this could be a lot more efficient. | 2761 // TODO(dcarney): this could be a lot more efficient. |
| 2772 return range.Contains(0x39c) || | 2762 return range.Contains(0x39c) || |
| 2773 range.Contains(0x3bc) || range.Contains(0x178); | 2763 range.Contains(0x3bc) || range.Contains(0x178); |
| 2774 } | 2764 } |
| 2775 | 2765 |
| 2776 | 2766 |
| 2777 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { | 2767 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { |
| 2778 for (int i = 0; i < ranges->length(); i++) { | 2768 for (int i = 0; i < ranges->length(); i++) { |
| 2779 // TODO(dcarney): this could be a lot more efficient. | 2769 // TODO(dcarney): this could be a lot more efficient. |
| 2780 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; | 2770 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; |
| 2781 } | 2771 } |
| 2782 return false; | 2772 return false; |
| 2783 } | 2773 } |
| 2784 | 2774 |
| 2775 static uc16 ConvertNonLatin1ToEquivalentLatin1(bool unicode, uc16 c) { | |
| 2776 #ifdef V8_I18N_SUPPORT | |
| 2777 if (unicode) { | |
| 2778 USet* set = uset_open(c, c); | |
| 2779 uset_closeOver(set, USET_CASE_INSENSITIVE); | |
| 2780 uset_removeAllStrings(set); | |
| 2781 int length = uset_size(set); | |
| 2782 uc16 result = 0; | |
| 2783 for (int i = 0; i < length; i++) { | |
| 2784 uc32 c = uset_charAt(set, i); | |
| 2785 if (c <= String::kMaxOneByteCharCode) { | |
| 2786 result = static_cast<uc16>(c); | |
| 2787 break; | |
| 2788 } | |
| 2789 } | |
| 2790 uset_close(set); | |
| 2791 return result; | |
| 2792 } | |
| 2793 // Fallback to unibrow if ICU is not included. | |
| 2794 #endif // V8_I18N_SUPPORT | |
| 2795 return unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | |
| 2796 } | |
| 2785 | 2797 |
| 2786 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { | 2798 RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| 2787 if (info()->replacement_calculated) return replacement(); | 2799 if (info()->replacement_calculated) return replacement(); |
| 2788 if (depth < 0) return this; | 2800 if (depth < 0) return this; |
| 2789 DCHECK(!info()->visited); | 2801 DCHECK(!info()->visited); |
| 2790 VisitMarker marker(info()); | 2802 VisitMarker marker(info()); |
| 2791 int element_count = elements()->length(); | 2803 int element_count = elements()->length(); |
| 2792 for (int i = 0; i < element_count; i++) { | 2804 for (int i = 0; i < element_count; i++) { |
| 2793 TextElement elm = elements()->at(i); | 2805 TextElement elm = elements()->at(i); |
| 2794 if (elm.text_type() == TextElement::ATOM) { | 2806 if (elm.text_type() == TextElement::ATOM) { |
| 2795 Vector<const uc16> quarks = elm.atom()->data(); | 2807 Vector<const uc16> quarks = elm.atom()->data(); |
| 2796 for (int j = 0; j < quarks.length(); j++) { | 2808 for (int j = 0; j < quarks.length(); j++) { |
| 2797 uint16_t c = quarks[j]; | 2809 uc16 c = quarks[j]; |
| 2798 if (c <= String::kMaxOneByteCharCode) continue; | 2810 if (c <= String::kMaxOneByteCharCode) continue; |
| 2799 if (!ignore_case) return set_replacement(NULL); | 2811 if (!compiler->ignore_case()) return set_replacement(NULL); |
| 2800 // Here, we need to check for characters whose upper and lower cases | 2812 // Here, we need to check for characters whose upper and lower cases |
| 2801 // are outside the Latin-1 range. | 2813 // are outside the Latin-1 range. |
| 2802 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | 2814 uc16 converted = |
| 2815 ConvertNonLatin1ToEquivalentLatin1(compiler->unicode(), c); | |
| 2803 // Character is outside Latin-1 completely | 2816 // Character is outside Latin-1 completely |
| 2804 if (converted == 0) return set_replacement(NULL); | 2817 if (converted == 0) return set_replacement(NULL); |
| 2805 // Convert quark to Latin-1 in place. | 2818 // Convert quark to Latin-1 in place. |
| 2806 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); | 2819 uc16* copy = const_cast<uc16*>(quarks.start()); |
| 2807 copy[j] = converted; | 2820 copy[j] = converted; |
| 2808 } | 2821 } |
| 2809 } else { | 2822 } else { |
| 2810 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); | 2823 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); |
| 2811 RegExpCharacterClass* cc = elm.char_class(); | 2824 RegExpCharacterClass* cc = elm.char_class(); |
| 2812 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2825 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 2813 CharacterRange::Canonicalize(ranges); | 2826 CharacterRange::Canonicalize(ranges); |
| 2814 // Now they are in order so we only need to look at the first. | 2827 // Now they are in order so we only need to look at the first. |
| 2815 int range_count = ranges->length(); | 2828 int range_count = ranges->length(); |
| 2816 if (cc->is_negated()) { | 2829 if (cc->is_negated()) { |
| 2817 if (range_count != 0 && | 2830 if (range_count != 0 && |
| 2818 ranges->at(0).from() == 0 && | 2831 ranges->at(0).from() == 0 && |
| 2819 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2832 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
| 2820 // This will be handled in a later filter. | 2833 // This will be handled in a later filter. |
| 2821 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2834 if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) |
| 2835 continue; | |
| 2822 return set_replacement(NULL); | 2836 return set_replacement(NULL); |
| 2823 } | 2837 } |
| 2824 } else { | 2838 } else { |
| 2825 if (range_count == 0 || | 2839 if (range_count == 0 || |
| 2826 ranges->at(0).from() > String::kMaxOneByteCharCode) { | 2840 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
| 2827 // This will be handled in a later filter. | 2841 // This will be handled in a later filter. |
| 2828 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; | 2842 if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) |
| 2843 continue; | |
| 2829 return set_replacement(NULL); | 2844 return set_replacement(NULL); |
| 2830 } | 2845 } |
| 2831 } | 2846 } |
| 2832 } | 2847 } |
| 2833 } | 2848 } |
| 2834 return FilterSuccessor(depth - 1, ignore_case); | 2849 return FilterSuccessor(depth - 1, compiler); |
| 2835 } | 2850 } |
| 2836 | 2851 |
| 2837 | 2852 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| 2838 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { | |
| 2839 if (info()->replacement_calculated) return replacement(); | 2853 if (info()->replacement_calculated) return replacement(); |
| 2840 if (depth < 0) return this; | 2854 if (depth < 0) return this; |
| 2841 if (info()->visited) return this; | 2855 if (info()->visited) return this; |
| 2842 { | 2856 { |
| 2843 VisitMarker marker(info()); | 2857 VisitMarker marker(info()); |
| 2844 | 2858 |
| 2845 RegExpNode* continue_replacement = | 2859 RegExpNode* continue_replacement = |
| 2846 continue_node_->FilterOneByte(depth - 1, ignore_case); | 2860 continue_node_->FilterOneByte(depth - 1, compiler); |
| 2847 // If we can't continue after the loop then there is no sense in doing the | 2861 // If we can't continue after the loop then there is no sense in doing the |
| 2848 // loop. | 2862 // loop. |
| 2849 if (continue_replacement == NULL) return set_replacement(NULL); | 2863 if (continue_replacement == NULL) return set_replacement(NULL); |
| 2850 } | 2864 } |
| 2851 | 2865 |
| 2852 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); | 2866 return ChoiceNode::FilterOneByte(depth - 1, compiler); |
| 2853 } | 2867 } |
| 2854 | 2868 |
| 2855 | 2869 RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| 2856 RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { | |
| 2857 if (info()->replacement_calculated) return replacement(); | 2870 if (info()->replacement_calculated) return replacement(); |
| 2858 if (depth < 0) return this; | 2871 if (depth < 0) return this; |
| 2859 if (info()->visited) return this; | 2872 if (info()->visited) return this; |
| 2860 VisitMarker marker(info()); | 2873 VisitMarker marker(info()); |
| 2861 int choice_count = alternatives_->length(); | 2874 int choice_count = alternatives_->length(); |
| 2862 | 2875 |
| 2863 for (int i = 0; i < choice_count; i++) { | 2876 for (int i = 0; i < choice_count; i++) { |
| 2864 GuardedAlternative alternative = alternatives_->at(i); | 2877 GuardedAlternative alternative = alternatives_->at(i); |
| 2865 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { | 2878 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { |
| 2866 set_replacement(this); | 2879 set_replacement(this); |
| 2867 return this; | 2880 return this; |
| 2868 } | 2881 } |
| 2869 } | 2882 } |
| 2870 | 2883 |
| 2871 int surviving = 0; | 2884 int surviving = 0; |
| 2872 RegExpNode* survivor = NULL; | 2885 RegExpNode* survivor = NULL; |
| 2873 for (int i = 0; i < choice_count; i++) { | 2886 for (int i = 0; i < choice_count; i++) { |
| 2874 GuardedAlternative alternative = alternatives_->at(i); | 2887 GuardedAlternative alternative = alternatives_->at(i); |
| 2875 RegExpNode* replacement = | 2888 RegExpNode* replacement = |
| 2876 alternative.node()->FilterOneByte(depth - 1, ignore_case); | 2889 alternative.node()->FilterOneByte(depth - 1, compiler); |
| 2877 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. | 2890 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. |
| 2878 if (replacement != NULL) { | 2891 if (replacement != NULL) { |
| 2879 alternatives_->at(i).set_node(replacement); | 2892 alternatives_->at(i).set_node(replacement); |
| 2880 surviving++; | 2893 surviving++; |
| 2881 survivor = replacement; | 2894 survivor = replacement; |
| 2882 } | 2895 } |
| 2883 } | 2896 } |
| 2884 if (surviving < 2) return set_replacement(survivor); | 2897 if (surviving < 2) return set_replacement(survivor); |
| 2885 | 2898 |
| 2886 set_replacement(this); | 2899 set_replacement(this); |
| 2887 if (surviving == choice_count) { | 2900 if (surviving == choice_count) { |
| 2888 return this; | 2901 return this; |
| 2889 } | 2902 } |
| 2890 // Only some of the nodes survived the filtering. We need to rebuild the | 2903 // Only some of the nodes survived the filtering. We need to rebuild the |
| 2891 // alternatives list. | 2904 // alternatives list. |
| 2892 ZoneList<GuardedAlternative>* new_alternatives = | 2905 ZoneList<GuardedAlternative>* new_alternatives = |
| 2893 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); | 2906 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); |
| 2894 for (int i = 0; i < choice_count; i++) { | 2907 for (int i = 0; i < choice_count; i++) { |
| 2895 RegExpNode* replacement = | 2908 RegExpNode* replacement = |
| 2896 alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case); | 2909 alternatives_->at(i).node()->FilterOneByte(depth - 1, compiler); |
| 2897 if (replacement != NULL) { | 2910 if (replacement != NULL) { |
| 2898 alternatives_->at(i).set_node(replacement); | 2911 alternatives_->at(i).set_node(replacement); |
| 2899 new_alternatives->Add(alternatives_->at(i), zone()); | 2912 new_alternatives->Add(alternatives_->at(i), zone()); |
| 2900 } | 2913 } |
| 2901 } | 2914 } |
| 2902 alternatives_ = new_alternatives; | 2915 alternatives_ = new_alternatives; |
| 2903 return this; | 2916 return this; |
| 2904 } | 2917 } |
| 2905 | 2918 |
| 2906 | 2919 RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte( |
| 2907 RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, | 2920 int depth, RegExpCompiler* compiler) { |
| 2908 bool ignore_case) { | |
| 2909 if (info()->replacement_calculated) return replacement(); | 2921 if (info()->replacement_calculated) return replacement(); |
| 2910 if (depth < 0) return this; | 2922 if (depth < 0) return this; |
| 2911 if (info()->visited) return this; | 2923 if (info()->visited) return this; |
| 2912 VisitMarker marker(info()); | 2924 VisitMarker marker(info()); |
| 2913 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 2925 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| 2914 // afterwards. | 2926 // afterwards. |
| 2915 RegExpNode* node = alternatives_->at(1).node(); | 2927 RegExpNode* node = alternatives_->at(1).node(); |
| 2916 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); | 2928 RegExpNode* replacement = node->FilterOneByte(depth - 1, compiler); |
| 2917 if (replacement == NULL) return set_replacement(NULL); | 2929 if (replacement == NULL) return set_replacement(NULL); |
| 2918 alternatives_->at(1).set_node(replacement); | 2930 alternatives_->at(1).set_node(replacement); |
| 2919 | 2931 |
| 2920 RegExpNode* neg_node = alternatives_->at(0).node(); | 2932 RegExpNode* neg_node = alternatives_->at(0).node(); |
| 2921 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); | 2933 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, compiler); |
| 2922 // If the negative lookahead is always going to fail then | 2934 // If the negative lookahead is always going to fail then |
| 2923 // we don't need to check it. | 2935 // we don't need to check it. |
| 2924 if (neg_replacement == NULL) return set_replacement(replacement); | 2936 if (neg_replacement == NULL) return set_replacement(replacement); |
| 2925 alternatives_->at(0).set_node(neg_replacement); | 2937 alternatives_->at(0).set_node(neg_replacement); |
| 2926 return set_replacement(this); | 2938 return set_replacement(this); |
| 2927 } | 2939 } |
| 2928 | 2940 |
| 2929 | 2941 |
| 2930 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2942 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2931 RegExpCompiler* compiler, | 2943 RegExpCompiler* compiler, |
| 2932 int characters_filled_in, | 2944 int characters_filled_in, |
| 2933 bool not_at_start) { | 2945 bool not_at_start) { |
| 2934 if (body_can_be_zero_length_ || info()->visited) return; | 2946 if (body_can_be_zero_length_ || info()->visited) return; |
| 2935 VisitMarker marker(info()); | 2947 VisitMarker marker(info()); |
| 2936 return ChoiceNode::GetQuickCheckDetails(details, | 2948 return ChoiceNode::GetQuickCheckDetails(details, |
| 2937 compiler, | 2949 compiler, |
| 2938 characters_filled_in, | 2950 characters_filled_in, |
| 2939 not_at_start); | 2951 not_at_start); |
| 2940 } | 2952 } |
| 2941 | 2953 |
| 2942 | 2954 void LoopChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
| 2943 void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, | 2955 int budget, BoyerMooreLookahead* bm, |
| 2944 BoyerMooreLookahead* bm, bool not_at_start) { | 2956 bool not_at_start) { |
| 2945 if (body_can_be_zero_length_ || budget <= 0) { | 2957 if (body_can_be_zero_length_ || budget <= 0) { |
| 2946 bm->SetRest(offset); | 2958 bm->SetRest(offset); |
| 2947 SaveBMInfo(bm, not_at_start, offset); | 2959 SaveBMInfo(bm, not_at_start, offset); |
| 2948 return; | 2960 return; |
| 2949 } | 2961 } |
| 2950 ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); | 2962 ChoiceNode::FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
| 2951 SaveBMInfo(bm, not_at_start, offset); | 2963 SaveBMInfo(bm, not_at_start, offset); |
| 2952 } | 2964 } |
| 2953 | 2965 |
| 2954 | 2966 |
| 2955 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2967 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2956 RegExpCompiler* compiler, | 2968 RegExpCompiler* compiler, |
| 2957 int characters_filled_in, | 2969 int characters_filled_in, |
| 2958 bool not_at_start) { | 2970 bool not_at_start) { |
| 2959 not_at_start = (not_at_start || not_at_start_); | 2971 not_at_start = (not_at_start || not_at_start_); |
| 2960 int choice_count = alternatives_->length(); | 2972 int choice_count = alternatives_->length(); |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3032 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 3044 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
| 3033 } | 3045 } |
| 3034 assembler->Bind(&ok); | 3046 assembler->Bind(&ok); |
| 3035 on_success->Emit(compiler, &new_trace); | 3047 on_success->Emit(compiler, &new_trace); |
| 3036 } | 3048 } |
| 3037 | 3049 |
| 3038 | 3050 |
| 3039 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 3051 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
| 3040 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 3052 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
| 3041 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3053 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3042 Isolate* isolate = assembler->isolate(); | |
| 3043 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 3054 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
| 3044 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 3055 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
| 3045 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 3056 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
| 3046 if (lookahead == NULL) { | 3057 if (lookahead == NULL) { |
| 3047 int eats_at_least = | 3058 int eats_at_least = |
| 3048 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore, | 3059 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore, |
| 3049 kRecursionBudget, | 3060 kRecursionBudget, |
| 3050 not_at_start)); | 3061 not_at_start)); |
| 3051 if (eats_at_least >= 1) { | 3062 if (eats_at_least >= 1) { |
| 3052 BoyerMooreLookahead* bm = | 3063 BoyerMooreLookahead* bm = |
| 3053 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); | 3064 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); |
| 3054 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start); | 3065 FillInBMInfo(compiler, 0, kRecursionBudget, bm, not_at_start); |
| 3055 if (bm->at(0)->is_non_word()) | 3066 if (bm->at(0)->is_non_word()) |
| 3056 next_is_word_character = Trace::FALSE_VALUE; | 3067 next_is_word_character = Trace::FALSE_VALUE; |
| 3057 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; | 3068 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
| 3058 } | 3069 } |
| 3059 } else { | 3070 } else { |
| 3060 if (lookahead->at(0)->is_non_word()) | 3071 if (lookahead->at(0)->is_non_word()) |
| 3061 next_is_word_character = Trace::FALSE_VALUE; | 3072 next_is_word_character = Trace::FALSE_VALUE; |
| 3062 if (lookahead->at(0)->is_word()) | 3073 if (lookahead->at(0)->is_word()) |
| 3063 next_is_word_character = Trace::TRUE_VALUE; | 3074 next_is_word_character = Trace::TRUE_VALUE; |
| 3064 } | 3075 } |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3216 // up to the limit the quick check already checked. In addition the quick | 3227 // up to the limit the quick check already checked. In addition the quick |
| 3217 // check can have involved a mask and compare operation which may simplify | 3228 // check can have involved a mask and compare operation which may simplify |
| 3218 // or obviate the need for further checks at some character positions. | 3229 // or obviate the need for further checks at some character positions. |
| 3219 void TextNode::TextEmitPass(RegExpCompiler* compiler, | 3230 void TextNode::TextEmitPass(RegExpCompiler* compiler, |
| 3220 TextEmitPassType pass, | 3231 TextEmitPassType pass, |
| 3221 bool preloaded, | 3232 bool preloaded, |
| 3222 Trace* trace, | 3233 Trace* trace, |
| 3223 bool first_element_checked, | 3234 bool first_element_checked, |
| 3224 int* checked_up_to) { | 3235 int* checked_up_to) { |
| 3225 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3236 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3226 Isolate* isolate = assembler->isolate(); | |
| 3227 bool one_byte = compiler->one_byte(); | 3237 bool one_byte = compiler->one_byte(); |
| 3228 Label* backtrack = trace->backtrack(); | 3238 Label* backtrack = trace->backtrack(); |
| 3229 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 3239 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
| 3230 int element_count = elements()->length(); | 3240 int element_count = elements()->length(); |
| 3231 int backward_offset = read_backward() ? -Length() : 0; | 3241 int backward_offset = read_backward() ? -Length() : 0; |
| 3232 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 3242 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
| 3233 TextElement elm = elements()->at(i); | 3243 TextElement elm = elements()->at(i); |
| 3234 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; | 3244 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; |
| 3235 if (elm.text_type() == TextElement::ATOM) { | 3245 if (elm.text_type() == TextElement::ATOM) { |
| 3236 Vector<const uc16> quarks = elm.atom()->data(); | 3246 Vector<const uc16> quarks = elm.atom()->data(); |
| 3237 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3247 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 3238 if (first_element_checked && i == 0 && j == 0) continue; | 3248 if (first_element_checked && i == 0 && j == 0) continue; |
| 3239 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; | 3249 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
| 3240 EmitCharacterFunction* emit_function = NULL; | 3250 EmitCharacterFunction* emit_function = NULL; |
| 3241 switch (pass) { | 3251 switch (pass) { |
| 3242 case NON_LATIN1_MATCH: | 3252 case NON_LATIN1_MATCH: |
| 3243 DCHECK(one_byte); | 3253 DCHECK(one_byte); |
| 3244 if (quarks[j] > String::kMaxOneByteCharCode) { | 3254 if (quarks[j] > String::kMaxOneByteCharCode && |
| 3255 !(compiler->unicode() && compiler->ignore_case())) { | |
| 3256 // In the non-unicode case, if the pattern is non-Latin1, it | |
|
erikcorry
2016/02/01 15:21:40
I think you mean in the one-byte case (non-unicode
Yang
2016/02/02 06:06:48
I did mean this. With /i, if a character is outsid
| |
| 3257 // cannot possibly match a Latin1 character, unless we have the | |
| 3258 // /ui flags, e.g. /\u212b/ui matches "\u00c5". | |
| 3245 assembler->GoTo(backtrack); | 3259 assembler->GoTo(backtrack); |
| 3246 return; | 3260 return; |
| 3247 } | 3261 } |
| 3248 break; | 3262 break; |
| 3249 case NON_LETTER_CHARACTER_MATCH: | 3263 case NON_LETTER_CHARACTER_MATCH: |
| 3250 emit_function = &EmitAtomNonLetter; | 3264 emit_function = &EmitAtomNonLetter; |
| 3251 break; | 3265 break; |
| 3252 case SIMPLE_CHARACTER_MATCH: | 3266 case SIMPLE_CHARACTER_MATCH: |
| 3253 emit_function = &EmitSimpleCharacter; | 3267 emit_function = &EmitSimpleCharacter; |
| 3254 break; | 3268 break; |
| 3255 case CASE_CHARACTER_MATCH: | 3269 case CASE_CHARACTER_MATCH: |
| 3256 emit_function = &EmitAtomLetter; | 3270 emit_function = &EmitAtomLetter; |
| 3257 break; | 3271 break; |
| 3258 default: | 3272 default: |
| 3259 break; | 3273 break; |
| 3260 } | 3274 } |
| 3261 if (emit_function != NULL) { | 3275 if (emit_function != NULL) { |
| 3262 bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); | 3276 bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); |
| 3263 bool bound_checked = | 3277 bool bound_checked = |
| 3264 emit_function(isolate, compiler, quarks[j], backtrack, | 3278 emit_function(compiler, quarks[j], backtrack, cp_offset + j, |
| 3265 cp_offset + j, bounds_check, preloaded); | 3279 bounds_check, preloaded); |
| 3266 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 3280 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| 3267 } | 3281 } |
| 3268 } | 3282 } |
| 3269 } else { | 3283 } else { |
| 3270 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); | 3284 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
| 3271 if (pass == CHARACTER_CLASS_MATCH) { | 3285 if (pass == CHARACTER_CLASS_MATCH) { |
| 3272 if (first_element_checked && i == 0) continue; | 3286 if (first_element_checked && i == 0) continue; |
| 3273 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 3287 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
| 3274 RegExpCharacterClass* cc = elm.char_class(); | 3288 RegExpCharacterClass* cc = elm.char_class(); |
| 3275 bool bounds_check = *checked_up_to < cp_offset || read_backward(); | 3289 bool bounds_check = *checked_up_to < cp_offset || read_backward(); |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3338 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3352 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3339 LimitResult limit_result = LimitVersions(compiler, trace); | 3353 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3340 if (limit_result == DONE) return; | 3354 if (limit_result == DONE) return; |
| 3341 DCHECK(limit_result == CONTINUE); | 3355 DCHECK(limit_result == CONTINUE); |
| 3342 | 3356 |
| 3343 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { | 3357 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { |
| 3344 compiler->SetRegExpTooBig(); | 3358 compiler->SetRegExpTooBig(); |
| 3345 return; | 3359 return; |
| 3346 } | 3360 } |
| 3347 | 3361 |
| 3348 if (compiler->one_byte()) { | 3362 if (compiler->one_byte()) { |
|
erikcorry
2016/02/01 15:21:40
You should check for the /ui flags here instead of
Yang
2016/02/02 06:06:48
Done.
| |
| 3349 int dummy = 0; | 3363 int dummy = 0; |
| 3350 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); | 3364 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); |
| 3351 } | 3365 } |
| 3352 | 3366 |
| 3353 bool first_elt_done = false; | 3367 bool first_elt_done = false; |
| 3354 int bound_checked_to = trace->cp_offset() - 1; | 3368 int bound_checked_to = trace->cp_offset() - 1; |
| 3355 bound_checked_to += trace->bound_checked_up_to(); | 3369 bound_checked_to += trace->bound_checked_up_to(); |
| 3356 | 3370 |
| 3357 // If a character is preloaded into the current character register then | 3371 // If a character is preloaded into the current character register then |
| 3358 // check that now. | 3372 // check that now. |
| (...skipping 730 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4089 | 4103 |
| 4090 // Really we should be creating a new trace when we execute this function, | 4104 // Really we should be creating a new trace when we execute this function, |
| 4091 // but there is no need, because the code it generates cannot backtrack, and | 4105 // but there is no need, because the code it generates cannot backtrack, and |
| 4092 // we always arrive here with a trivial trace (since it's the entry to a | 4106 // we always arrive here with a trivial trace (since it's the entry to a |
| 4093 // loop. That also implies that there are no preloaded characters, which is | 4107 // loop. That also implies that there are no preloaded characters, which is |
| 4094 // good, because it means we won't be violating any assumptions by | 4108 // good, because it means we won't be violating any assumptions by |
| 4095 // overwriting those characters with new load instructions. | 4109 // overwriting those characters with new load instructions. |
| 4096 DCHECK(trace->is_trivial()); | 4110 DCHECK(trace->is_trivial()); |
| 4097 | 4111 |
| 4098 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 4112 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 4099 Isolate* isolate = macro_assembler->isolate(); | |
| 4100 // At this point we know that we are at a non-greedy loop that will eat | 4113 // At this point we know that we are at a non-greedy loop that will eat |
| 4101 // any character one at a time. Any non-anchored regexp has such a | 4114 // any character one at a time. Any non-anchored regexp has such a |
| 4102 // loop prepended to it in order to find where it starts. We look for | 4115 // loop prepended to it in order to find where it starts. We look for |
| 4103 // a pattern of the form ...abc... where we can look 6 characters ahead | 4116 // a pattern of the form ...abc... where we can look 6 characters ahead |
| 4104 // and step forwards 3 if the character is not one of abc. Abc need | 4117 // and step forwards 3 if the character is not one of abc. Abc need |
| 4105 // not be atoms, they can be any reasonably limited character class or | 4118 // not be atoms, they can be any reasonably limited character class or |
| 4106 // small alternation. | 4119 // small alternation. |
| 4107 BoyerMooreLookahead* bm = bm_info(false); | 4120 BoyerMooreLookahead* bm = bm_info(false); |
| 4108 if (bm == NULL) { | 4121 if (bm == NULL) { |
| 4109 eats_at_least = Min(kMaxLookaheadForBoyerMoore, | 4122 eats_at_least = Min(kMaxLookaheadForBoyerMoore, |
| 4110 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 4123 EatsAtLeast(kMaxLookaheadForBoyerMoore, |
| 4111 kRecursionBudget, | 4124 kRecursionBudget, |
| 4112 false)); | 4125 false)); |
| 4113 if (eats_at_least >= 1) { | 4126 if (eats_at_least >= 1) { |
| 4114 bm = new(zone()) BoyerMooreLookahead(eats_at_least, | 4127 bm = new(zone()) BoyerMooreLookahead(eats_at_least, |
| 4115 compiler, | 4128 compiler, |
| 4116 zone()); | 4129 zone()); |
| 4117 GuardedAlternative alt0 = alternatives_->at(0); | 4130 GuardedAlternative alt0 = alternatives_->at(0); |
| 4118 alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false); | 4131 alt0.node()->FillInBMInfo(compiler, 0, kRecursionBudget, bm, false); |
| 4119 } | 4132 } |
| 4120 } | 4133 } |
| 4121 if (bm != NULL) { | 4134 if (bm != NULL) { |
| 4122 bm->EmitSkipInstructions(macro_assembler); | 4135 bm->EmitSkipInstructions(macro_assembler); |
| 4123 } | 4136 } |
| 4124 return eats_at_least; | 4137 return eats_at_least; |
| 4125 } | 4138 } |
| 4126 | 4139 |
| 4127 | 4140 |
| 4128 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, | 4141 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, |
| (...skipping 2236 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6365 | 6378 |
| 6366 void Analysis::VisitBackReference(BackReferenceNode* that) { | 6379 void Analysis::VisitBackReference(BackReferenceNode* that) { |
| 6367 EnsureAnalyzed(that->on_success()); | 6380 EnsureAnalyzed(that->on_success()); |
| 6368 } | 6381 } |
| 6369 | 6382 |
| 6370 | 6383 |
| 6371 void Analysis::VisitAssertion(AssertionNode* that) { | 6384 void Analysis::VisitAssertion(AssertionNode* that) { |
| 6372 EnsureAnalyzed(that->on_success()); | 6385 EnsureAnalyzed(that->on_success()); |
| 6373 } | 6386 } |
| 6374 | 6387 |
| 6375 | 6388 void BackReferenceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
| 6376 void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, | 6389 int budget, BoyerMooreLookahead* bm, |
| 6377 BoyerMooreLookahead* bm, | |
| 6378 bool not_at_start) { | 6390 bool not_at_start) { |
| 6379 // Working out the set of characters that a backreference can match is too | 6391 // Working out the set of characters that a backreference can match is too |
| 6380 // hard, so we just say that any character can match. | 6392 // hard, so we just say that any character can match. |
| 6381 bm->SetRest(offset); | 6393 bm->SetRest(offset); |
| 6382 SaveBMInfo(bm, not_at_start, offset); | 6394 SaveBMInfo(bm, not_at_start, offset); |
| 6383 } | 6395 } |
| 6384 | 6396 |
| 6385 | 6397 |
| 6386 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == | 6398 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == |
| 6387 RegExpMacroAssembler::kTableSize); | 6399 RegExpMacroAssembler::kTableSize); |
| 6388 | 6400 |
| 6389 | 6401 void ChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, |
| 6390 void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, | |
| 6391 BoyerMooreLookahead* bm, bool not_at_start) { | 6402 BoyerMooreLookahead* bm, bool not_at_start) { |
| 6392 ZoneList<GuardedAlternative>* alts = alternatives(); | 6403 ZoneList<GuardedAlternative>* alts = alternatives(); |
| 6393 budget = (budget - 1) / alts->length(); | 6404 budget = (budget - 1) / alts->length(); |
| 6394 for (int i = 0; i < alts->length(); i++) { | 6405 for (int i = 0; i < alts->length(); i++) { |
| 6395 GuardedAlternative& alt = alts->at(i); | 6406 GuardedAlternative& alt = alts->at(i); |
| 6396 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 6407 if (alt.guards() != NULL && alt.guards()->length() != 0) { |
| 6397 bm->SetRest(offset); // Give up trying to fill in info. | 6408 bm->SetRest(offset); // Give up trying to fill in info. |
| 6398 SaveBMInfo(bm, not_at_start, offset); | 6409 SaveBMInfo(bm, not_at_start, offset); |
| 6399 return; | 6410 return; |
| 6400 } | 6411 } |
| 6401 alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start); | 6412 alt.node()->FillInBMInfo(compiler, offset, budget, bm, not_at_start); |
| 6402 } | 6413 } |
| 6403 SaveBMInfo(bm, not_at_start, offset); | 6414 SaveBMInfo(bm, not_at_start, offset); |
| 6404 } | 6415 } |
| 6405 | 6416 |
| 6406 | 6417 void TextNode::FillInBMInfo(RegExpCompiler* compiler, int initial_offset, |
| 6407 void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, | 6418 int budget, BoyerMooreLookahead* bm, |
| 6408 BoyerMooreLookahead* bm, bool not_at_start) { | 6419 bool not_at_start) { |
| 6409 if (initial_offset >= bm->length()) return; | 6420 if (initial_offset >= bm->length()) return; |
| 6410 int offset = initial_offset; | 6421 int offset = initial_offset; |
| 6411 int max_char = bm->max_char(); | 6422 int max_char = bm->max_char(); |
| 6412 for (int i = 0; i < elements()->length(); i++) { | 6423 for (int i = 0; i < elements()->length(); i++) { |
| 6413 if (offset >= bm->length()) { | 6424 if (offset >= bm->length()) { |
| 6414 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6425 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6415 return; | 6426 return; |
| 6416 } | 6427 } |
| 6417 TextElement text = elements()->at(i); | 6428 TextElement text = elements()->at(i); |
| 6418 if (text.text_type() == TextElement::ATOM) { | 6429 if (text.text_type() == TextElement::ATOM) { |
| 6419 RegExpAtom* atom = text.atom(); | 6430 RegExpAtom* atom = text.atom(); |
| 6420 for (int j = 0; j < atom->length(); j++, offset++) { | 6431 for (int j = 0; j < atom->length(); j++, offset++) { |
| 6421 if (offset >= bm->length()) { | 6432 if (offset >= bm->length()) { |
| 6422 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6433 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6423 return; | 6434 return; |
| 6424 } | 6435 } |
| 6425 uc16 character = atom->data()[j]; | 6436 uc16 character = atom->data()[j]; |
| 6426 if (bm->compiler()->ignore_case()) { | 6437 if (bm->compiler()->ignore_case()) { |
| 6427 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 6438 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 6428 int length = GetCaseIndependentLetters( | 6439 int length = GetCaseIndependentLetters( |
| 6429 isolate, character, bm->max_char() == String::kMaxOneByteCharCode, | 6440 compiler, character, |
| 6430 chars); | 6441 bm->max_char() == String::kMaxOneByteCharCode, chars); |
| 6431 for (int j = 0; j < length; j++) { | 6442 for (int j = 0; j < length; j++) { |
| 6432 bm->Set(offset, chars[j]); | 6443 bm->Set(offset, chars[j]); |
| 6433 } | 6444 } |
| 6434 } else { | 6445 } else { |
| 6435 if (character <= max_char) bm->Set(offset, character); | 6446 if (character <= max_char) bm->Set(offset, character); |
| 6436 } | 6447 } |
| 6437 } | 6448 } |
| 6438 } else { | 6449 } else { |
| 6439 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); | 6450 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); |
| 6440 RegExpCharacterClass* char_class = text.char_class(); | 6451 RegExpCharacterClass* char_class = text.char_class(); |
| 6441 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); | 6452 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); |
| 6442 if (char_class->is_negated()) { | 6453 if (char_class->is_negated()) { |
| 6443 bm->SetAll(offset); | 6454 bm->SetAll(offset); |
| 6444 } else { | 6455 } else { |
| 6445 for (int k = 0; k < ranges->length(); k++) { | 6456 for (int k = 0; k < ranges->length(); k++) { |
| 6446 CharacterRange& range = ranges->at(k); | 6457 CharacterRange& range = ranges->at(k); |
| 6447 if (range.from() > max_char) continue; | 6458 if (range.from() > max_char) continue; |
| 6448 int to = Min(max_char, static_cast<int>(range.to())); | 6459 int to = Min(max_char, static_cast<int>(range.to())); |
| 6449 bm->SetInterval(offset, Interval(range.from(), to)); | 6460 bm->SetInterval(offset, Interval(range.from(), to)); |
| 6450 } | 6461 } |
| 6451 } | 6462 } |
| 6452 offset++; | 6463 offset++; |
| 6453 } | 6464 } |
| 6454 } | 6465 } |
| 6455 if (offset >= bm->length()) { | 6466 if (offset >= bm->length()) { |
| 6456 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6467 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6457 return; | 6468 return; |
| 6458 } | 6469 } |
| 6459 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, | 6470 on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, |
| 6460 true); // Not at start after a text node. | 6471 true); // Not at start after a text node. |
| 6461 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6472 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6462 } | 6473 } |
| 6463 | 6474 |
| 6464 | 6475 |
| 6465 // ------------------------------------------------------------------- | 6476 // ------------------------------------------------------------------- |
| 6466 // Dispatch table construction | 6477 // Dispatch table construction |
| 6467 | 6478 |
| 6468 | 6479 |
| 6469 void DispatchTableConstructor::VisitEnd(EndNode* that) { | 6480 void DispatchTableConstructor::VisitEnd(EndNode* that) { |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6607 } | 6618 } |
| 6608 | 6619 |
| 6609 | 6620 |
| 6610 RegExpEngine::CompilationResult RegExpEngine::Compile( | 6621 RegExpEngine::CompilationResult RegExpEngine::Compile( |
| 6611 Isolate* isolate, Zone* zone, RegExpCompileData* data, | 6622 Isolate* isolate, Zone* zone, RegExpCompileData* data, |
| 6612 JSRegExp::Flags flags, Handle<String> pattern, | 6623 JSRegExp::Flags flags, Handle<String> pattern, |
| 6613 Handle<String> sample_subject, bool is_one_byte) { | 6624 Handle<String> sample_subject, bool is_one_byte) { |
| 6614 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 6625 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| 6615 return IrregexpRegExpTooBig(isolate); | 6626 return IrregexpRegExpTooBig(isolate); |
| 6616 } | 6627 } |
| 6617 bool ignore_case = flags & JSRegExp::kIgnoreCase; | |
| 6618 bool is_sticky = flags & JSRegExp::kSticky; | 6628 bool is_sticky = flags & JSRegExp::kSticky; |
| 6619 bool is_global = flags & JSRegExp::kGlobal; | 6629 bool is_global = flags & JSRegExp::kGlobal; |
| 6620 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, | 6630 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, |
| 6621 is_one_byte); | 6631 is_one_byte); |
| 6622 | 6632 |
| 6623 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); | 6633 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); |
| 6624 | 6634 |
| 6625 // Sample some characters from the middle of the string. | 6635 // Sample some characters from the middle of the string. |
| 6626 static const int kSampleSize = 128; | 6636 static const int kSampleSize = 128; |
| 6627 | 6637 |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 6656 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 6666 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
| 6657 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 6667 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
| 6658 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( | 6668 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
| 6659 new (zone) RegExpCharacterClass('*'), false, loop_node))); | 6669 new (zone) RegExpCharacterClass('*'), false, loop_node))); |
| 6660 node = first_step_node; | 6670 node = first_step_node; |
| 6661 } else { | 6671 } else { |
| 6662 node = loop_node; | 6672 node = loop_node; |
| 6663 } | 6673 } |
| 6664 } | 6674 } |
| 6665 if (is_one_byte) { | 6675 if (is_one_byte) { |
| 6666 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6676 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); |
| 6667 // Do it again to propagate the new nodes to places where they were not | 6677 // Do it again to propagate the new nodes to places where they were not |
| 6668 // put because they had not been calculated yet. | 6678 // put because they had not been calculated yet. |
| 6669 if (node != NULL) { | 6679 if (node != NULL) { |
| 6670 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); | 6680 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); |
| 6671 } | 6681 } |
| 6672 } else if (compiler.unicode() && (is_global || is_sticky)) { | 6682 } else if (compiler.unicode() && (is_global || is_sticky)) { |
| 6673 node = OptionallyStepBackToLeadSurrogate(&compiler, node); | 6683 node = OptionallyStepBackToLeadSurrogate(&compiler, node); |
| 6674 } | 6684 } |
| 6675 | 6685 |
| 6676 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 6686 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
| 6677 data->node = node; | 6687 data->node = node; |
| 6678 Analysis analysis(isolate, flags, is_one_byte); | 6688 Analysis analysis(isolate, flags, is_one_byte); |
| 6679 analysis.EnsureAnalyzed(node); | 6689 analysis.EnsureAnalyzed(node); |
| 6680 if (analysis.has_failed()) { | 6690 if (analysis.has_failed()) { |
| (...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6855 | 6865 |
| 6856 | 6866 |
| 6857 void RegExpResultsCache::Clear(FixedArray* cache) { | 6867 void RegExpResultsCache::Clear(FixedArray* cache) { |
| 6858 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6868 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
| 6859 cache->set(i, Smi::FromInt(0)); | 6869 cache->set(i, Smi::FromInt(0)); |
| 6860 } | 6870 } |
| 6861 } | 6871 } |
| 6862 | 6872 |
| 6863 } // namespace internal | 6873 } // namespace internal |
| 6864 } // namespace v8 | 6874 } // namespace v8 |
| OLD | NEW |