| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/jsregexp.h" | 5 #include "src/regexp/jsregexp.h" |
| 6 | 6 |
| 7 #include "src/ast/ast.h" | 7 #include "src/ast/ast.h" |
| 8 #include "src/base/platform/platform.h" | 8 #include "src/base/platform/platform.h" |
| 9 #include "src/compilation-cache.h" | 9 #include "src/compilation-cache.h" |
| 10 #include "src/compiler.h" | 10 #include "src/compiler.h" |
| (...skipping 1580 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1591 macro_assembler->IfRegisterLT(guard->reg(), | 1591 macro_assembler->IfRegisterLT(guard->reg(), |
| 1592 guard->value(), | 1592 guard->value(), |
| 1593 trace->backtrack()); | 1593 trace->backtrack()); |
| 1594 break; | 1594 break; |
| 1595 } | 1595 } |
| 1596 } | 1596 } |
| 1597 | 1597 |
| 1598 | 1598 |
| 1599 // Returns the number of characters in the equivalence class, omitting those | 1599 // Returns the number of characters in the equivalence class, omitting those |
| 1600 // that cannot occur in the source string because it is Latin1. | 1600 // that cannot occur in the source string because it is Latin1. |
| 1601 static int GetCaseIndependentLetters(RegExpCompiler* compiler, uc16 character, | 1601 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, |
| 1602 uc32* letters) { | 1602 bool one_byte_subject, |
| 1603 int length; | 1603 unibrow::uchar* letters) { |
| 1604 #ifdef V8_I18N_SUPPORT | 1604 int length = |
| 1605 if (compiler->unicode()) { | 1605 isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
| 1606 USet* set = uset_open(character, character); | 1606 // Unibrow returns 0 or 1 for characters where case independence is |
| 1607 uset_closeOver(set, USET_CASE_INSENSITIVE); | 1607 // trivial. |
| 1608 uset_removeAllStrings(set); | 1608 if (length == 0) { |
| 1609 length = uset_size(set); | 1609 letters[0] = character; |
| 1610 for (int i = 0; i < length; i++) { | 1610 length = 1; |
| 1611 letters[i] = uset_charAt(set, i); | |
| 1612 } | |
| 1613 uset_close(set); | |
| 1614 } else // NOLINT | |
| 1615 // Fallback in case ICU is not included. | |
| 1616 #endif // V8_I18N_SUPPORT | |
| 1617 { | |
| 1618 length = compiler->isolate()->jsregexp_uncanonicalize()->get(character, | |
| 1619 '\0', letters); | |
| 1620 // Unibrow returns 0 or 1 for characters where case independence is | |
| 1621 // trivial. | |
| 1622 if (length == 0) { | |
| 1623 letters[0] = character; | |
| 1624 length = 1; | |
| 1625 } | |
| 1626 } | 1611 } |
| 1627 | 1612 |
| 1628 if (compiler->one_byte()) { | 1613 if (one_byte_subject) { |
| 1629 int new_length = 0; | 1614 int new_length = 0; |
| 1630 for (int i = 0; i < length; i++) { | 1615 for (int i = 0; i < length; i++) { |
| 1631 if (letters[i] <= String::kMaxOneByteCharCode) { | 1616 if (letters[i] <= String::kMaxOneByteCharCode) { |
| 1632 letters[new_length++] = letters[i]; | 1617 letters[new_length++] = letters[i]; |
| 1633 } | 1618 } |
| 1634 } | 1619 } |
| 1635 length = new_length; | 1620 length = new_length; |
| 1636 } | 1621 } |
| 1637 | 1622 |
| 1638 return length; | 1623 return length; |
| 1639 } | 1624 } |
| 1640 | 1625 |
| 1641 static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, uc16 c, | 1626 |
| 1642 Label* on_failure, int cp_offset, | 1627 static inline bool EmitSimpleCharacter(Isolate* isolate, |
| 1643 bool check, bool preloaded) { | 1628 RegExpCompiler* compiler, |
| 1629 uc16 c, |
| 1630 Label* on_failure, |
| 1631 int cp_offset, |
| 1632 bool check, |
| 1633 bool preloaded) { |
| 1644 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 1634 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 1645 bool bound_checked = false; | 1635 bool bound_checked = false; |
| 1646 if (!preloaded) { | 1636 if (!preloaded) { |
| 1647 assembler->LoadCurrentCharacter( | 1637 assembler->LoadCurrentCharacter( |
| 1648 cp_offset, | 1638 cp_offset, |
| 1649 on_failure, | 1639 on_failure, |
| 1650 check); | 1640 check); |
| 1651 bound_checked = true; | 1641 bound_checked = true; |
| 1652 } | 1642 } |
| 1653 assembler->CheckNotCharacter(c, on_failure); | 1643 assembler->CheckNotCharacter(c, on_failure); |
| 1654 return bound_checked; | 1644 return bound_checked; |
| 1655 } | 1645 } |
| 1656 | 1646 |
| 1657 | 1647 |
| 1658 // Only emits non-letters (things that don't have case). Only used for case | 1648 // Only emits non-letters (things that don't have case). Only used for case |
| 1659 // independent matches. | 1649 // independent matches. |
| 1660 static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, uc16 c, | 1650 static inline bool EmitAtomNonLetter(Isolate* isolate, |
| 1661 Label* on_failure, int cp_offset, | 1651 RegExpCompiler* compiler, |
| 1662 bool check, bool preloaded) { | 1652 uc16 c, |
| 1653 Label* on_failure, |
| 1654 int cp_offset, |
| 1655 bool check, |
| 1656 bool preloaded) { |
| 1663 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1657 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1658 bool one_byte = compiler->one_byte(); |
| 1664 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1659 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1665 int length = GetCaseIndependentLetters(compiler, c, chars); | 1660 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
| 1666 if (length < 1) { | 1661 if (length < 1) { |
| 1667 // This can't match. Must be an one-byte subject and a non-one-byte | 1662 // This can't match. Must be an one-byte subject and a non-one-byte |
| 1668 // character. We do not need to do anything since the one-byte pass | 1663 // character. We do not need to do anything since the one-byte pass |
| 1669 // already handled this. | 1664 // already handled this. |
| 1670 return false; // Bounds not checked. | 1665 return false; // Bounds not checked. |
| 1671 } | 1666 } |
| 1672 bool checked = false; | 1667 bool checked = false; |
| 1673 // We handle the length > 1 case in a later pass. | 1668 // We handle the length > 1 case in a later pass. |
| 1674 if (length == 1) { | 1669 if (length == 1) { |
| 1675 if (compiler->one_byte() && c > String::kMaxOneByteCharCodeU) { | 1670 if (one_byte && c > String::kMaxOneByteCharCodeU) { |
| 1676 // This cannot match. | 1671 // Can't match - see above. |
| 1677 return false; // Bounds not checked. | 1672 return false; // Bounds not checked. |
| 1678 } | 1673 } |
| 1679 if (!preloaded) { | 1674 if (!preloaded) { |
| 1680 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1675 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 1681 checked = check; | 1676 checked = check; |
| 1682 } | 1677 } |
| 1683 macro_assembler->CheckNotCharacter(c, on_failure); | 1678 macro_assembler->CheckNotCharacter(c, on_failure); |
| 1684 } | 1679 } |
| 1685 return checked; | 1680 return checked; |
| 1686 } | 1681 } |
| (...skipping 28 matching lines...) Expand all Loading... |
| 1715 uc16 mask = char_mask ^ diff; | 1710 uc16 mask = char_mask ^ diff; |
| 1716 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, | 1711 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, |
| 1717 diff, | 1712 diff, |
| 1718 mask, | 1713 mask, |
| 1719 on_failure); | 1714 on_failure); |
| 1720 return true; | 1715 return true; |
| 1721 } | 1716 } |
| 1722 return false; | 1717 return false; |
| 1723 } | 1718 } |
| 1724 | 1719 |
| 1725 typedef bool EmitCharacterFunction(RegExpCompiler* compiler, uc16 c, | 1720 |
| 1726 Label* on_failure, int cp_offset, bool check, | 1721 typedef bool EmitCharacterFunction(Isolate* isolate, |
| 1722 RegExpCompiler* compiler, |
| 1723 uc16 c, |
| 1724 Label* on_failure, |
| 1725 int cp_offset, |
| 1726 bool check, |
| 1727 bool preloaded); | 1727 bool preloaded); |
| 1728 | 1728 |
| 1729 // Only emits letters (things that have case). Only used for case independent | 1729 // Only emits letters (things that have case). Only used for case independent |
| 1730 // matches. | 1730 // matches. |
| 1731 static inline bool EmitAtomLetter(RegExpCompiler* compiler, uc16 c, | 1731 static inline bool EmitAtomLetter(Isolate* isolate, |
| 1732 Label* on_failure, int cp_offset, bool check, | 1732 RegExpCompiler* compiler, |
| 1733 uc16 c, |
| 1734 Label* on_failure, |
| 1735 int cp_offset, |
| 1736 bool check, |
| 1733 bool preloaded) { | 1737 bool preloaded) { |
| 1734 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 1738 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 1739 bool one_byte = compiler->one_byte(); |
| 1735 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 1740 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 1736 int length = GetCaseIndependentLetters(compiler, c, chars); | 1741 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
| 1737 if (length <= 1) return false; | 1742 if (length <= 1) return false; |
| 1738 // We may not need to check against the end of the input string | 1743 // We may not need to check against the end of the input string |
| 1739 // if this character lies before a character that matched. | 1744 // if this character lies before a character that matched. |
| 1740 if (!preloaded) { | 1745 if (!preloaded) { |
| 1741 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); | 1746 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| 1742 } | 1747 } |
| 1743 Label ok; | 1748 Label ok; |
| 1744 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); | 1749 DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
| 1745 switch (length) { | 1750 switch (length) { |
| 1746 case 2: { | 1751 case 2: { |
| 1747 if (ShortCutEmitCharacterPair(macro_assembler, compiler->one_byte(), | 1752 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], |
| 1748 chars[0], chars[1], on_failure)) { | 1753 chars[1], on_failure)) { |
| 1749 } else { | 1754 } else { |
| 1750 macro_assembler->CheckCharacter(chars[0], &ok); | 1755 macro_assembler->CheckCharacter(chars[0], &ok); |
| 1751 macro_assembler->CheckNotCharacter(chars[1], on_failure); | 1756 macro_assembler->CheckNotCharacter(chars[1], on_failure); |
| 1752 macro_assembler->Bind(&ok); | 1757 macro_assembler->Bind(&ok); |
| 1753 } | 1758 } |
| 1754 break; | 1759 break; |
| 1755 } | 1760 } |
| 1756 case 4: | 1761 case 4: |
| 1757 macro_assembler->CheckCharacter(chars[3], &ok); | 1762 macro_assembler->CheckCharacter(chars[3], &ok); |
| 1758 // Fall through! | 1763 // Fall through! |
| (...skipping 516 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2275 int ActionNode::EatsAtLeast(int still_to_find, | 2280 int ActionNode::EatsAtLeast(int still_to_find, |
| 2276 int budget, | 2281 int budget, |
| 2277 bool not_at_start) { | 2282 bool not_at_start) { |
| 2278 if (budget <= 0) return 0; | 2283 if (budget <= 0) return 0; |
| 2279 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! | 2284 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! |
| 2280 return on_success()->EatsAtLeast(still_to_find, | 2285 return on_success()->EatsAtLeast(still_to_find, |
| 2281 budget - 1, | 2286 budget - 1, |
| 2282 not_at_start); | 2287 not_at_start); |
| 2283 } | 2288 } |
| 2284 | 2289 |
| 2285 void ActionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, | 2290 |
| 2291 void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| 2286 BoyerMooreLookahead* bm, bool not_at_start) { | 2292 BoyerMooreLookahead* bm, bool not_at_start) { |
| 2287 if (action_type_ == BEGIN_SUBMATCH) { | 2293 if (action_type_ == BEGIN_SUBMATCH) { |
| 2288 bm->SetRest(offset); | 2294 bm->SetRest(offset); |
| 2289 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { | 2295 } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
| 2290 on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); | 2296 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| 2291 } | 2297 } |
| 2292 SaveBMInfo(bm, not_at_start, offset); | 2298 SaveBMInfo(bm, not_at_start, offset); |
| 2293 } | 2299 } |
| 2294 | 2300 |
| 2295 | 2301 |
| 2296 int AssertionNode::EatsAtLeast(int still_to_find, | 2302 int AssertionNode::EatsAtLeast(int still_to_find, |
| 2297 int budget, | 2303 int budget, |
| 2298 bool not_at_start) { | 2304 bool not_at_start) { |
| 2299 if (budget <= 0) return 0; | 2305 if (budget <= 0) return 0; |
| 2300 // If we know we are not at the start and we are asked "how many characters | 2306 // If we know we are not at the start and we are asked "how many characters |
| 2301 // will you match if you succeed?" then we can answer anything since false | 2307 // will you match if you succeed?" then we can answer anything since false |
| 2302 // implies false. So lets just return the max answer (still_to_find) since | 2308 // implies false. So lets just return the max answer (still_to_find) since |
| 2303 // that won't prevent us from preloading a lot of characters for the other | 2309 // that won't prevent us from preloading a lot of characters for the other |
| 2304 // branches in the node graph. | 2310 // branches in the node graph. |
| 2305 if (assertion_type() == AT_START && not_at_start) return still_to_find; | 2311 if (assertion_type() == AT_START && not_at_start) return still_to_find; |
| 2306 return on_success()->EatsAtLeast(still_to_find, | 2312 return on_success()->EatsAtLeast(still_to_find, |
| 2307 budget - 1, | 2313 budget - 1, |
| 2308 not_at_start); | 2314 not_at_start); |
| 2309 } | 2315 } |
| 2310 | 2316 |
| 2311 void AssertionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, | 2317 |
| 2312 int budget, BoyerMooreLookahead* bm, | 2318 void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| 2313 bool not_at_start) { | 2319 BoyerMooreLookahead* bm, bool not_at_start) { |
| 2314 // Match the behaviour of EatsAtLeast on this node. | 2320 // Match the behaviour of EatsAtLeast on this node. |
| 2315 if (assertion_type() == AT_START && not_at_start) return; | 2321 if (assertion_type() == AT_START && not_at_start) return; |
| 2316 on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); | 2322 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| 2317 SaveBMInfo(bm, not_at_start, offset); | 2323 SaveBMInfo(bm, not_at_start, offset); |
| 2318 } | 2324 } |
| 2319 | 2325 |
| 2320 | 2326 |
| 2321 int BackReferenceNode::EatsAtLeast(int still_to_find, | 2327 int BackReferenceNode::EatsAtLeast(int still_to_find, |
| 2322 int budget, | 2328 int budget, |
| 2323 bool not_at_start) { | 2329 bool not_at_start) { |
| 2324 if (read_backward()) return 0; | 2330 if (read_backward()) return 0; |
| 2325 if (budget <= 0) return 0; | 2331 if (budget <= 0) return 0; |
| 2326 return on_success()->EatsAtLeast(still_to_find, | 2332 return on_success()->EatsAtLeast(still_to_find, |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2520 // The masks and values for the positions will be combined into a single | 2526 // The masks and values for the positions will be combined into a single |
| 2521 // machine word for the current character width in order to be used in | 2527 // machine word for the current character width in order to be used in |
| 2522 // generating a quick check. | 2528 // generating a quick check. |
| 2523 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2529 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2524 RegExpCompiler* compiler, | 2530 RegExpCompiler* compiler, |
| 2525 int characters_filled_in, | 2531 int characters_filled_in, |
| 2526 bool not_at_start) { | 2532 bool not_at_start) { |
| 2527 // Do not collect any quick check details if the text node reads backward, | 2533 // Do not collect any quick check details if the text node reads backward, |
| 2528 // since it reads in the opposite direction than we use for quick checks. | 2534 // since it reads in the opposite direction than we use for quick checks. |
| 2529 if (read_backward()) return; | 2535 if (read_backward()) return; |
| 2536 Isolate* isolate = compiler->macro_assembler()->isolate(); |
| 2530 DCHECK(characters_filled_in < details->characters()); | 2537 DCHECK(characters_filled_in < details->characters()); |
| 2531 int characters = details->characters(); | 2538 int characters = details->characters(); |
| 2532 int char_mask; | 2539 int char_mask; |
| 2533 if (compiler->one_byte()) { | 2540 if (compiler->one_byte()) { |
| 2534 char_mask = String::kMaxOneByteCharCode; | 2541 char_mask = String::kMaxOneByteCharCode; |
| 2535 } else { | 2542 } else { |
| 2536 char_mask = String::kMaxUtf16CodeUnit; | 2543 char_mask = String::kMaxUtf16CodeUnit; |
| 2537 } | 2544 } |
| 2538 for (int k = 0; k < elements()->length(); k++) { | 2545 for (int k = 0; k < elements()->length(); k++) { |
| 2539 TextElement elm = elements()->at(k); | 2546 TextElement elm = elements()->at(k); |
| 2540 if (elm.text_type() == TextElement::ATOM) { | 2547 if (elm.text_type() == TextElement::ATOM) { |
| 2541 Vector<const uc16> quarks = elm.atom()->data(); | 2548 Vector<const uc16> quarks = elm.atom()->data(); |
| 2542 for (int i = 0; i < characters && i < quarks.length(); i++) { | 2549 for (int i = 0; i < characters && i < quarks.length(); i++) { |
| 2543 QuickCheckDetails::Position* pos = | 2550 QuickCheckDetails::Position* pos = |
| 2544 details->positions(characters_filled_in); | 2551 details->positions(characters_filled_in); |
| 2545 uc16 c = quarks[i]; | 2552 uc16 c = quarks[i]; |
| 2546 if (compiler->ignore_case()) { | 2553 if (compiler->ignore_case()) { |
| 2547 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 2554 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 2548 int length = GetCaseIndependentLetters(compiler, c, chars); | 2555 int length = GetCaseIndependentLetters(isolate, c, |
| 2556 compiler->one_byte(), chars); |
| 2549 if (length == 0) { | 2557 if (length == 0) { |
| 2550 // This can happen because all case variants are non-Latin1, but we | 2558 // This can happen because all case variants are non-Latin1, but we |
| 2551 // know the input is Latin1. | 2559 // know the input is Latin1. |
| 2552 details->set_cannot_match(); | 2560 details->set_cannot_match(); |
| 2553 pos->determines_perfectly = false; | 2561 pos->determines_perfectly = false; |
| 2554 return; | 2562 return; |
| 2555 } | 2563 } |
| 2556 if (length == 1) { | 2564 if (length == 1) { |
| 2557 // This letter has no case equivalents, so it's nice and simple | 2565 // This letter has no case equivalents, so it's nice and simple |
| 2558 // and the mask-compare will determine definitely whether we have | 2566 // and the mask-compare will determine definitely whether we have |
| (...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2743 DCHECK(!info->visited); | 2751 DCHECK(!info->visited); |
| 2744 info->visited = true; | 2752 info->visited = true; |
| 2745 } | 2753 } |
| 2746 ~VisitMarker() { | 2754 ~VisitMarker() { |
| 2747 info_->visited = false; | 2755 info_->visited = false; |
| 2748 } | 2756 } |
| 2749 private: | 2757 private: |
| 2750 NodeInfo* info_; | 2758 NodeInfo* info_; |
| 2751 }; | 2759 }; |
| 2752 | 2760 |
| 2753 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpCompiler* compiler) { | 2761 |
| 2762 RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) { |
| 2754 if (info()->replacement_calculated) return replacement(); | 2763 if (info()->replacement_calculated) return replacement(); |
| 2755 if (depth < 0) return this; | 2764 if (depth < 0) return this; |
| 2756 DCHECK(!info()->visited); | 2765 DCHECK(!info()->visited); |
| 2757 VisitMarker marker(info()); | 2766 VisitMarker marker(info()); |
| 2758 return FilterSuccessor(depth - 1, compiler); | 2767 return FilterSuccessor(depth - 1, ignore_case); |
| 2759 } | 2768 } |
| 2760 | 2769 |
| 2761 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, | 2770 |
| 2762 RegExpCompiler* compiler) { | 2771 RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { |
| 2763 RegExpNode* next = on_success_->FilterOneByte(depth - 1, compiler); | 2772 RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
| 2764 if (next == NULL) return set_replacement(NULL); | 2773 if (next == NULL) return set_replacement(NULL); |
| 2765 on_success_ = next; | 2774 on_success_ = next; |
| 2766 return set_replacement(this); | 2775 return set_replacement(this); |
| 2767 } | 2776 } |
| 2768 | 2777 |
| 2769 | 2778 |
| 2770 // We need to check for the following characters: 0x39c 0x3bc 0x178. | 2779 // We need to check for the following characters: 0x39c 0x3bc 0x178. |
| 2771 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { | 2780 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) { |
| 2772 // TODO(dcarney): this could be a lot more efficient. | 2781 // TODO(dcarney): this could be a lot more efficient. |
| 2773 return range.Contains(0x39c) || | 2782 return range.Contains(0x39c) || |
| 2774 range.Contains(0x3bc) || range.Contains(0x178); | 2783 range.Contains(0x3bc) || range.Contains(0x178); |
| 2775 } | 2784 } |
| 2776 | 2785 |
| 2777 | 2786 |
| 2778 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { | 2787 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { |
| 2779 for (int i = 0; i < ranges->length(); i++) { | 2788 for (int i = 0; i < ranges->length(); i++) { |
| 2780 // TODO(dcarney): this could be a lot more efficient. | 2789 // TODO(dcarney): this could be a lot more efficient. |
| 2781 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; | 2790 if (RangeContainsLatin1Equivalents(ranges->at(i))) return true; |
| 2782 } | 2791 } |
| 2783 return false; | 2792 return false; |
| 2784 } | 2793 } |
| 2785 | 2794 |
| 2786 static uc16 ConvertNonLatin1ToEquivalentLatin1(bool unicode, uc16 c) { | |
| 2787 #ifdef V8_I18N_SUPPORT | |
| 2788 if (unicode) { | |
| 2789 USet* set = uset_open(c, c); | |
| 2790 uset_closeOver(set, USET_CASE_INSENSITIVE); | |
| 2791 uset_removeAllStrings(set); | |
| 2792 int length = uset_size(set); | |
| 2793 uc16 result = 0; | |
| 2794 for (int i = 0; i < length; i++) { | |
| 2795 uc32 c = uset_charAt(set, i); | |
| 2796 if (c <= String::kMaxOneByteCharCode) { | |
| 2797 result = static_cast<uc16>(c); | |
| 2798 break; | |
| 2799 } | |
| 2800 } | |
| 2801 uset_close(set); | |
| 2802 return result; | |
| 2803 } | |
| 2804 // Fallback to unibrow if ICU is not included. | |
| 2805 #endif // V8_I18N_SUPPORT | |
| 2806 return unibrow::Latin1::ConvertNonLatin1ToLatin1(c); | |
| 2807 } | |
| 2808 | 2795 |
| 2809 RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) { | 2796 RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
| 2810 if (info()->replacement_calculated) return replacement(); | 2797 if (info()->replacement_calculated) return replacement(); |
| 2811 if (depth < 0) return this; | 2798 if (depth < 0) return this; |
| 2812 DCHECK(!info()->visited); | 2799 DCHECK(!info()->visited); |
| 2813 VisitMarker marker(info()); | 2800 VisitMarker marker(info()); |
| 2814 int element_count = elements()->length(); | 2801 int element_count = elements()->length(); |
| 2815 for (int i = 0; i < element_count; i++) { | 2802 for (int i = 0; i < element_count; i++) { |
| 2816 TextElement elm = elements()->at(i); | 2803 TextElement elm = elements()->at(i); |
| 2817 if (elm.text_type() == TextElement::ATOM) { | 2804 if (elm.text_type() == TextElement::ATOM) { |
| 2818 Vector<const uc16> quarks = elm.atom()->data(); | 2805 Vector<const uc16> quarks = elm.atom()->data(); |
| 2819 for (int j = 0; j < quarks.length(); j++) { | 2806 for (int j = 0; j < quarks.length(); j++) { |
| 2820 uc16 c = quarks[j]; | 2807 uint16_t c = quarks[j]; |
| 2821 if (c <= String::kMaxOneByteCharCode) continue; | 2808 if (c <= String::kMaxOneByteCharCode) continue; |
| 2822 if (!compiler->ignore_case()) return set_replacement(NULL); | 2809 if (!ignore_case) return set_replacement(NULL); |
| 2823 // Here, we need to check for characters whose upper and lower cases | 2810 // Here, we need to check for characters whose upper and lower cases |
| 2824 // are outside the Latin-1 range. | 2811 // are outside the Latin-1 range. |
| 2825 uc16 converted = | 2812 uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
| 2826 ConvertNonLatin1ToEquivalentLatin1(compiler->unicode(), c); | |
| 2827 // Character is outside Latin-1 completely | 2813 // Character is outside Latin-1 completely |
| 2828 if (converted == 0) return set_replacement(NULL); | 2814 if (converted == 0) return set_replacement(NULL); |
| 2829 // Convert quark to Latin-1 in place. | 2815 // Convert quark to Latin-1 in place. |
| 2830 uc16* copy = const_cast<uc16*>(quarks.start()); | 2816 uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
| 2831 copy[j] = converted; | 2817 copy[j] = converted; |
| 2832 } | 2818 } |
| 2833 } else { | 2819 } else { |
| 2834 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); | 2820 DCHECK(elm.text_type() == TextElement::CHAR_CLASS); |
| 2835 RegExpCharacterClass* cc = elm.char_class(); | 2821 RegExpCharacterClass* cc = elm.char_class(); |
| 2836 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); | 2822 ZoneList<CharacterRange>* ranges = cc->ranges(zone()); |
| 2837 CharacterRange::Canonicalize(ranges); | 2823 CharacterRange::Canonicalize(ranges); |
| 2838 // Now they are in order so we only need to look at the first. | 2824 // Now they are in order so we only need to look at the first. |
| 2839 int range_count = ranges->length(); | 2825 int range_count = ranges->length(); |
| 2840 if (cc->is_negated()) { | 2826 if (cc->is_negated()) { |
| 2841 if (range_count != 0 && | 2827 if (range_count != 0 && |
| 2842 ranges->at(0).from() == 0 && | 2828 ranges->at(0).from() == 0 && |
| 2843 ranges->at(0).to() >= String::kMaxOneByteCharCode) { | 2829 ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
| 2844 // This will be handled in a later filter. | 2830 // This will be handled in a later filter. |
| 2845 if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) | 2831 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| 2846 continue; | |
| 2847 return set_replacement(NULL); | 2832 return set_replacement(NULL); |
| 2848 } | 2833 } |
| 2849 } else { | 2834 } else { |
| 2850 if (range_count == 0 || | 2835 if (range_count == 0 || |
| 2851 ranges->at(0).from() > String::kMaxOneByteCharCode) { | 2836 ranges->at(0).from() > String::kMaxOneByteCharCode) { |
| 2852 // This will be handled in a later filter. | 2837 // This will be handled in a later filter. |
| 2853 if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) | 2838 if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| 2854 continue; | |
| 2855 return set_replacement(NULL); | 2839 return set_replacement(NULL); |
| 2856 } | 2840 } |
| 2857 } | 2841 } |
| 2858 } | 2842 } |
| 2859 } | 2843 } |
| 2860 return FilterSuccessor(depth - 1, compiler); | 2844 return FilterSuccessor(depth - 1, ignore_case); |
| 2861 } | 2845 } |
| 2862 | 2846 |
| 2863 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { | 2847 |
| 2848 RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| 2864 if (info()->replacement_calculated) return replacement(); | 2849 if (info()->replacement_calculated) return replacement(); |
| 2865 if (depth < 0) return this; | 2850 if (depth < 0) return this; |
| 2866 if (info()->visited) return this; | 2851 if (info()->visited) return this; |
| 2867 { | 2852 { |
| 2868 VisitMarker marker(info()); | 2853 VisitMarker marker(info()); |
| 2869 | 2854 |
| 2870 RegExpNode* continue_replacement = | 2855 RegExpNode* continue_replacement = |
| 2871 continue_node_->FilterOneByte(depth - 1, compiler); | 2856 continue_node_->FilterOneByte(depth - 1, ignore_case); |
| 2872 // If we can't continue after the loop then there is no sense in doing the | 2857 // If we can't continue after the loop then there is no sense in doing the |
| 2873 // loop. | 2858 // loop. |
| 2874 if (continue_replacement == NULL) return set_replacement(NULL); | 2859 if (continue_replacement == NULL) return set_replacement(NULL); |
| 2875 } | 2860 } |
| 2876 | 2861 |
| 2877 return ChoiceNode::FilterOneByte(depth - 1, compiler); | 2862 return ChoiceNode::FilterOneByte(depth - 1, ignore_case); |
| 2878 } | 2863 } |
| 2879 | 2864 |
| 2880 RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { | 2865 |
| 2866 RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| 2881 if (info()->replacement_calculated) return replacement(); | 2867 if (info()->replacement_calculated) return replacement(); |
| 2882 if (depth < 0) return this; | 2868 if (depth < 0) return this; |
| 2883 if (info()->visited) return this; | 2869 if (info()->visited) return this; |
| 2884 VisitMarker marker(info()); | 2870 VisitMarker marker(info()); |
| 2885 int choice_count = alternatives_->length(); | 2871 int choice_count = alternatives_->length(); |
| 2886 | 2872 |
| 2887 for (int i = 0; i < choice_count; i++) { | 2873 for (int i = 0; i < choice_count; i++) { |
| 2888 GuardedAlternative alternative = alternatives_->at(i); | 2874 GuardedAlternative alternative = alternatives_->at(i); |
| 2889 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { | 2875 if (alternative.guards() != NULL && alternative.guards()->length() != 0) { |
| 2890 set_replacement(this); | 2876 set_replacement(this); |
| 2891 return this; | 2877 return this; |
| 2892 } | 2878 } |
| 2893 } | 2879 } |
| 2894 | 2880 |
| 2895 int surviving = 0; | 2881 int surviving = 0; |
| 2896 RegExpNode* survivor = NULL; | 2882 RegExpNode* survivor = NULL; |
| 2897 for (int i = 0; i < choice_count; i++) { | 2883 for (int i = 0; i < choice_count; i++) { |
| 2898 GuardedAlternative alternative = alternatives_->at(i); | 2884 GuardedAlternative alternative = alternatives_->at(i); |
| 2899 RegExpNode* replacement = | 2885 RegExpNode* replacement = |
| 2900 alternative.node()->FilterOneByte(depth - 1, compiler); | 2886 alternative.node()->FilterOneByte(depth - 1, ignore_case); |
| 2901 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. | 2887 DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. |
| 2902 if (replacement != NULL) { | 2888 if (replacement != NULL) { |
| 2903 alternatives_->at(i).set_node(replacement); | 2889 alternatives_->at(i).set_node(replacement); |
| 2904 surviving++; | 2890 surviving++; |
| 2905 survivor = replacement; | 2891 survivor = replacement; |
| 2906 } | 2892 } |
| 2907 } | 2893 } |
| 2908 if (surviving < 2) return set_replacement(survivor); | 2894 if (surviving < 2) return set_replacement(survivor); |
| 2909 | 2895 |
| 2910 set_replacement(this); | 2896 set_replacement(this); |
| 2911 if (surviving == choice_count) { | 2897 if (surviving == choice_count) { |
| 2912 return this; | 2898 return this; |
| 2913 } | 2899 } |
| 2914 // Only some of the nodes survived the filtering. We need to rebuild the | 2900 // Only some of the nodes survived the filtering. We need to rebuild the |
| 2915 // alternatives list. | 2901 // alternatives list. |
| 2916 ZoneList<GuardedAlternative>* new_alternatives = | 2902 ZoneList<GuardedAlternative>* new_alternatives = |
| 2917 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); | 2903 new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); |
| 2918 for (int i = 0; i < choice_count; i++) { | 2904 for (int i = 0; i < choice_count; i++) { |
| 2919 RegExpNode* replacement = | 2905 RegExpNode* replacement = |
| 2920 alternatives_->at(i).node()->FilterOneByte(depth - 1, compiler); | 2906 alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case); |
| 2921 if (replacement != NULL) { | 2907 if (replacement != NULL) { |
| 2922 alternatives_->at(i).set_node(replacement); | 2908 alternatives_->at(i).set_node(replacement); |
| 2923 new_alternatives->Add(alternatives_->at(i), zone()); | 2909 new_alternatives->Add(alternatives_->at(i), zone()); |
| 2924 } | 2910 } |
| 2925 } | 2911 } |
| 2926 alternatives_ = new_alternatives; | 2912 alternatives_ = new_alternatives; |
| 2927 return this; | 2913 return this; |
| 2928 } | 2914 } |
| 2929 | 2915 |
| 2930 RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte( | 2916 |
| 2931 int depth, RegExpCompiler* compiler) { | 2917 RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, |
| 2918 bool ignore_case) { |
| 2932 if (info()->replacement_calculated) return replacement(); | 2919 if (info()->replacement_calculated) return replacement(); |
| 2933 if (depth < 0) return this; | 2920 if (depth < 0) return this; |
| 2934 if (info()->visited) return this; | 2921 if (info()->visited) return this; |
| 2935 VisitMarker marker(info()); | 2922 VisitMarker marker(info()); |
| 2936 // Alternative 0 is the negative lookahead, alternative 1 is what comes | 2923 // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| 2937 // afterwards. | 2924 // afterwards. |
| 2938 RegExpNode* node = alternatives_->at(1).node(); | 2925 RegExpNode* node = alternatives_->at(1).node(); |
| 2939 RegExpNode* replacement = node->FilterOneByte(depth - 1, compiler); | 2926 RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); |
| 2940 if (replacement == NULL) return set_replacement(NULL); | 2927 if (replacement == NULL) return set_replacement(NULL); |
| 2941 alternatives_->at(1).set_node(replacement); | 2928 alternatives_->at(1).set_node(replacement); |
| 2942 | 2929 |
| 2943 RegExpNode* neg_node = alternatives_->at(0).node(); | 2930 RegExpNode* neg_node = alternatives_->at(0).node(); |
| 2944 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, compiler); | 2931 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); |
| 2945 // If the negative lookahead is always going to fail then | 2932 // If the negative lookahead is always going to fail then |
| 2946 // we don't need to check it. | 2933 // we don't need to check it. |
| 2947 if (neg_replacement == NULL) return set_replacement(replacement); | 2934 if (neg_replacement == NULL) return set_replacement(replacement); |
| 2948 alternatives_->at(0).set_node(neg_replacement); | 2935 alternatives_->at(0).set_node(neg_replacement); |
| 2949 return set_replacement(this); | 2936 return set_replacement(this); |
| 2950 } | 2937 } |
| 2951 | 2938 |
| 2952 | 2939 |
| 2953 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2940 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2954 RegExpCompiler* compiler, | 2941 RegExpCompiler* compiler, |
| 2955 int characters_filled_in, | 2942 int characters_filled_in, |
| 2956 bool not_at_start) { | 2943 bool not_at_start) { |
| 2957 if (body_can_be_zero_length_ || info()->visited) return; | 2944 if (body_can_be_zero_length_ || info()->visited) return; |
| 2958 VisitMarker marker(info()); | 2945 VisitMarker marker(info()); |
| 2959 return ChoiceNode::GetQuickCheckDetails(details, | 2946 return ChoiceNode::GetQuickCheckDetails(details, |
| 2960 compiler, | 2947 compiler, |
| 2961 characters_filled_in, | 2948 characters_filled_in, |
| 2962 not_at_start); | 2949 not_at_start); |
| 2963 } | 2950 } |
| 2964 | 2951 |
| 2965 void LoopChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, | 2952 |
| 2966 int budget, BoyerMooreLookahead* bm, | 2953 void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| 2967 bool not_at_start) { | 2954 BoyerMooreLookahead* bm, bool not_at_start) { |
| 2968 if (body_can_be_zero_length_ || budget <= 0) { | 2955 if (body_can_be_zero_length_ || budget <= 0) { |
| 2969 bm->SetRest(offset); | 2956 bm->SetRest(offset); |
| 2970 SaveBMInfo(bm, not_at_start, offset); | 2957 SaveBMInfo(bm, not_at_start, offset); |
| 2971 return; | 2958 return; |
| 2972 } | 2959 } |
| 2973 ChoiceNode::FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); | 2960 ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| 2974 SaveBMInfo(bm, not_at_start, offset); | 2961 SaveBMInfo(bm, not_at_start, offset); |
| 2975 } | 2962 } |
| 2976 | 2963 |
| 2977 | 2964 |
| 2978 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, | 2965 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| 2979 RegExpCompiler* compiler, | 2966 RegExpCompiler* compiler, |
| 2980 int characters_filled_in, | 2967 int characters_filled_in, |
| 2981 bool not_at_start) { | 2968 bool not_at_start) { |
| 2982 not_at_start = (not_at_start || not_at_start_); | 2969 not_at_start = (not_at_start || not_at_start_); |
| 2983 int choice_count = alternatives_->length(); | 2970 int choice_count = alternatives_->length(); |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3055 assembler->CheckNotCharacter('\r', new_trace.backtrack()); | 3042 assembler->CheckNotCharacter('\r', new_trace.backtrack()); |
| 3056 } | 3043 } |
| 3057 assembler->Bind(&ok); | 3044 assembler->Bind(&ok); |
| 3058 on_success->Emit(compiler, &new_trace); | 3045 on_success->Emit(compiler, &new_trace); |
| 3059 } | 3046 } |
| 3060 | 3047 |
| 3061 | 3048 |
| 3062 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). | 3049 // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
| 3063 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { | 3050 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
| 3064 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3051 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3052 Isolate* isolate = assembler->isolate(); |
| 3065 Trace::TriBool next_is_word_character = Trace::UNKNOWN; | 3053 Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
| 3066 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); | 3054 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
| 3067 BoyerMooreLookahead* lookahead = bm_info(not_at_start); | 3055 BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
| 3068 if (lookahead == NULL) { | 3056 if (lookahead == NULL) { |
| 3069 int eats_at_least = | 3057 int eats_at_least = |
| 3070 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore, | 3058 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore, |
| 3071 kRecursionBudget, | 3059 kRecursionBudget, |
| 3072 not_at_start)); | 3060 not_at_start)); |
| 3073 if (eats_at_least >= 1) { | 3061 if (eats_at_least >= 1) { |
| 3074 BoyerMooreLookahead* bm = | 3062 BoyerMooreLookahead* bm = |
| 3075 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); | 3063 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); |
| 3076 FillInBMInfo(compiler, 0, kRecursionBudget, bm, not_at_start); | 3064 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start); |
| 3077 if (bm->at(0)->is_non_word()) | 3065 if (bm->at(0)->is_non_word()) |
| 3078 next_is_word_character = Trace::FALSE_VALUE; | 3066 next_is_word_character = Trace::FALSE_VALUE; |
| 3079 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; | 3067 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
| 3080 } | 3068 } |
| 3081 } else { | 3069 } else { |
| 3082 if (lookahead->at(0)->is_non_word()) | 3070 if (lookahead->at(0)->is_non_word()) |
| 3083 next_is_word_character = Trace::FALSE_VALUE; | 3071 next_is_word_character = Trace::FALSE_VALUE; |
| 3084 if (lookahead->at(0)->is_word()) | 3072 if (lookahead->at(0)->is_word()) |
| 3085 next_is_word_character = Trace::TRUE_VALUE; | 3073 next_is_word_character = Trace::TRUE_VALUE; |
| 3086 } | 3074 } |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3238 // up to the limit the quick check already checked. In addition the quick | 3226 // up to the limit the quick check already checked. In addition the quick |
| 3239 // check can have involved a mask and compare operation which may simplify | 3227 // check can have involved a mask and compare operation which may simplify |
| 3240 // or obviate the need for further checks at some character positions. | 3228 // or obviate the need for further checks at some character positions. |
| 3241 void TextNode::TextEmitPass(RegExpCompiler* compiler, | 3229 void TextNode::TextEmitPass(RegExpCompiler* compiler, |
| 3242 TextEmitPassType pass, | 3230 TextEmitPassType pass, |
| 3243 bool preloaded, | 3231 bool preloaded, |
| 3244 Trace* trace, | 3232 Trace* trace, |
| 3245 bool first_element_checked, | 3233 bool first_element_checked, |
| 3246 int* checked_up_to) { | 3234 int* checked_up_to) { |
| 3247 RegExpMacroAssembler* assembler = compiler->macro_assembler(); | 3235 RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| 3236 Isolate* isolate = assembler->isolate(); |
| 3248 bool one_byte = compiler->one_byte(); | 3237 bool one_byte = compiler->one_byte(); |
| 3249 Label* backtrack = trace->backtrack(); | 3238 Label* backtrack = trace->backtrack(); |
| 3250 QuickCheckDetails* quick_check = trace->quick_check_performed(); | 3239 QuickCheckDetails* quick_check = trace->quick_check_performed(); |
| 3251 int element_count = elements()->length(); | 3240 int element_count = elements()->length(); |
| 3252 int backward_offset = read_backward() ? -Length() : 0; | 3241 int backward_offset = read_backward() ? -Length() : 0; |
| 3253 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { | 3242 for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { |
| 3254 TextElement elm = elements()->at(i); | 3243 TextElement elm = elements()->at(i); |
| 3255 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; | 3244 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; |
| 3256 if (elm.text_type() == TextElement::ATOM) { | 3245 if (elm.text_type() == TextElement::ATOM) { |
| 3257 Vector<const uc16> quarks = elm.atom()->data(); | 3246 Vector<const uc16> quarks = elm.atom()->data(); |
| 3258 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { | 3247 for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { |
| 3259 if (first_element_checked && i == 0 && j == 0) continue; | 3248 if (first_element_checked && i == 0 && j == 0) continue; |
| 3260 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; | 3249 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; |
| 3261 EmitCharacterFunction* emit_function = NULL; | 3250 EmitCharacterFunction* emit_function = NULL; |
| 3262 switch (pass) { | 3251 switch (pass) { |
| 3263 case NON_LATIN1_MATCH: | 3252 case NON_LATIN1_MATCH: |
| 3264 DCHECK(one_byte); | 3253 DCHECK(one_byte); |
| 3265 DCHECK(!(compiler->unicode() && compiler->ignore_case())); | |
| 3266 if (quarks[j] > String::kMaxOneByteCharCode) { | 3254 if (quarks[j] > String::kMaxOneByteCharCode) { |
| 3267 assembler->GoTo(backtrack); | 3255 assembler->GoTo(backtrack); |
| 3268 return; | 3256 return; |
| 3269 } | 3257 } |
| 3270 break; | 3258 break; |
| 3271 case NON_LETTER_CHARACTER_MATCH: | 3259 case NON_LETTER_CHARACTER_MATCH: |
| 3272 emit_function = &EmitAtomNonLetter; | 3260 emit_function = &EmitAtomNonLetter; |
| 3273 break; | 3261 break; |
| 3274 case SIMPLE_CHARACTER_MATCH: | 3262 case SIMPLE_CHARACTER_MATCH: |
| 3275 emit_function = &EmitSimpleCharacter; | 3263 emit_function = &EmitSimpleCharacter; |
| 3276 break; | 3264 break; |
| 3277 case CASE_CHARACTER_MATCH: | 3265 case CASE_CHARACTER_MATCH: |
| 3278 emit_function = &EmitAtomLetter; | 3266 emit_function = &EmitAtomLetter; |
| 3279 break; | 3267 break; |
| 3280 default: | 3268 default: |
| 3281 break; | 3269 break; |
| 3282 } | 3270 } |
| 3283 if (emit_function != NULL) { | 3271 if (emit_function != NULL) { |
| 3284 bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); | 3272 bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); |
| 3285 bool bound_checked = | 3273 bool bound_checked = |
| 3286 emit_function(compiler, quarks[j], backtrack, cp_offset + j, | 3274 emit_function(isolate, compiler, quarks[j], backtrack, |
| 3287 bounds_check, preloaded); | 3275 cp_offset + j, bounds_check, preloaded); |
| 3288 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); | 3276 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| 3289 } | 3277 } |
| 3290 } | 3278 } |
| 3291 } else { | 3279 } else { |
| 3292 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); | 3280 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type()); |
| 3293 if (pass == CHARACTER_CLASS_MATCH) { | 3281 if (pass == CHARACTER_CLASS_MATCH) { |
| 3294 if (first_element_checked && i == 0) continue; | 3282 if (first_element_checked && i == 0) continue; |
| 3295 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; | 3283 if (DeterminedAlready(quick_check, elm.cp_offset())) continue; |
| 3296 RegExpCharacterClass* cc = elm.char_class(); | 3284 RegExpCharacterClass* cc = elm.char_class(); |
| 3297 bool bounds_check = *checked_up_to < cp_offset || read_backward(); | 3285 bool bounds_check = *checked_up_to < cp_offset || read_backward(); |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3360 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { | 3348 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
| 3361 LimitResult limit_result = LimitVersions(compiler, trace); | 3349 LimitResult limit_result = LimitVersions(compiler, trace); |
| 3362 if (limit_result == DONE) return; | 3350 if (limit_result == DONE) return; |
| 3363 DCHECK(limit_result == CONTINUE); | 3351 DCHECK(limit_result == CONTINUE); |
| 3364 | 3352 |
| 3365 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { | 3353 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) { |
| 3366 compiler->SetRegExpTooBig(); | 3354 compiler->SetRegExpTooBig(); |
| 3367 return; | 3355 return; |
| 3368 } | 3356 } |
| 3369 | 3357 |
| 3370 if (compiler->one_byte() && | 3358 if (compiler->one_byte()) { |
| 3371 !(compiler->unicode() && compiler->ignore_case())) { | |
| 3372 // If any character within the text node is outside the Latin1 range, it | |
| 3373 // cannot possibly match anything in a one-byte string. This still holds | |
| 3374 // for case-insensitive non-unicode regexp patterns. However, for | |
| 3375 // case-insensitive unicode regexp patterns, this is no longer true, e.g. | |
| 3376 // /\u212b/ui matches "\u00c5". | |
| 3377 int dummy = 0; | 3359 int dummy = 0; |
| 3378 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); | 3360 TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); |
| 3379 } | 3361 } |
| 3380 | 3362 |
| 3381 bool first_elt_done = false; | 3363 bool first_elt_done = false; |
| 3382 int bound_checked_to = trace->cp_offset() - 1; | 3364 int bound_checked_to = trace->cp_offset() - 1; |
| 3383 bound_checked_to += trace->bound_checked_up_to(); | 3365 bound_checked_to += trace->bound_checked_up_to(); |
| 3384 | 3366 |
| 3385 // If a character is preloaded into the current character register then | 3367 // If a character is preloaded into the current character register then |
| 3386 // check that now. | 3368 // check that now. |
| (...skipping 731 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4118 | 4100 |
| 4119 // Really we should be creating a new trace when we execute this function, | 4101 // Really we should be creating a new trace when we execute this function, |
| 4120 // but there is no need, because the code it generates cannot backtrack, and | 4102 // but there is no need, because the code it generates cannot backtrack, and |
| 4121 // we always arrive here with a trivial trace (since it's the entry to a | 4103 // we always arrive here with a trivial trace (since it's the entry to a |
| 4122 // loop. That also implies that there are no preloaded characters, which is | 4104 // loop. That also implies that there are no preloaded characters, which is |
| 4123 // good, because it means we won't be violating any assumptions by | 4105 // good, because it means we won't be violating any assumptions by |
| 4124 // overwriting those characters with new load instructions. | 4106 // overwriting those characters with new load instructions. |
| 4125 DCHECK(trace->is_trivial()); | 4107 DCHECK(trace->is_trivial()); |
| 4126 | 4108 |
| 4127 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); | 4109 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| 4110 Isolate* isolate = macro_assembler->isolate(); |
| 4128 // At this point we know that we are at a non-greedy loop that will eat | 4111 // At this point we know that we are at a non-greedy loop that will eat |
| 4129 // any character one at a time. Any non-anchored regexp has such a | 4112 // any character one at a time. Any non-anchored regexp has such a |
| 4130 // loop prepended to it in order to find where it starts. We look for | 4113 // loop prepended to it in order to find where it starts. We look for |
| 4131 // a pattern of the form ...abc... where we can look 6 characters ahead | 4114 // a pattern of the form ...abc... where we can look 6 characters ahead |
| 4132 // and step forwards 3 if the character is not one of abc. Abc need | 4115 // and step forwards 3 if the character is not one of abc. Abc need |
| 4133 // not be atoms, they can be any reasonably limited character class or | 4116 // not be atoms, they can be any reasonably limited character class or |
| 4134 // small alternation. | 4117 // small alternation. |
| 4135 BoyerMooreLookahead* bm = bm_info(false); | 4118 BoyerMooreLookahead* bm = bm_info(false); |
| 4136 if (bm == NULL) { | 4119 if (bm == NULL) { |
| 4137 eats_at_least = Min(kMaxLookaheadForBoyerMoore, | 4120 eats_at_least = Min(kMaxLookaheadForBoyerMoore, |
| 4138 EatsAtLeast(kMaxLookaheadForBoyerMoore, | 4121 EatsAtLeast(kMaxLookaheadForBoyerMoore, |
| 4139 kRecursionBudget, | 4122 kRecursionBudget, |
| 4140 false)); | 4123 false)); |
| 4141 if (eats_at_least >= 1) { | 4124 if (eats_at_least >= 1) { |
| 4142 bm = new(zone()) BoyerMooreLookahead(eats_at_least, | 4125 bm = new(zone()) BoyerMooreLookahead(eats_at_least, |
| 4143 compiler, | 4126 compiler, |
| 4144 zone()); | 4127 zone()); |
| 4145 GuardedAlternative alt0 = alternatives_->at(0); | 4128 GuardedAlternative alt0 = alternatives_->at(0); |
| 4146 alt0.node()->FillInBMInfo(compiler, 0, kRecursionBudget, bm, false); | 4129 alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false); |
| 4147 } | 4130 } |
| 4148 } | 4131 } |
| 4149 if (bm != NULL) { | 4132 if (bm != NULL) { |
| 4150 bm->EmitSkipInstructions(macro_assembler); | 4133 bm->EmitSkipInstructions(macro_assembler); |
| 4151 } | 4134 } |
| 4152 return eats_at_least; | 4135 return eats_at_least; |
| 4153 } | 4136 } |
| 4154 | 4137 |
| 4155 | 4138 |
| 4156 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, | 4139 void ChoiceNode::EmitChoices(RegExpCompiler* compiler, |
| (...skipping 2241 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6398 | 6381 |
| 6399 void Analysis::VisitBackReference(BackReferenceNode* that) { | 6382 void Analysis::VisitBackReference(BackReferenceNode* that) { |
| 6400 EnsureAnalyzed(that->on_success()); | 6383 EnsureAnalyzed(that->on_success()); |
| 6401 } | 6384 } |
| 6402 | 6385 |
| 6403 | 6386 |
| 6404 void Analysis::VisitAssertion(AssertionNode* that) { | 6387 void Analysis::VisitAssertion(AssertionNode* that) { |
| 6405 EnsureAnalyzed(that->on_success()); | 6388 EnsureAnalyzed(that->on_success()); |
| 6406 } | 6389 } |
| 6407 | 6390 |
| 6408 void BackReferenceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, | 6391 |
| 6409 int budget, BoyerMooreLookahead* bm, | 6392 void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| 6393 BoyerMooreLookahead* bm, |
| 6410 bool not_at_start) { | 6394 bool not_at_start) { |
| 6411 // Working out the set of characters that a backreference can match is too | 6395 // Working out the set of characters that a backreference can match is too |
| 6412 // hard, so we just say that any character can match. | 6396 // hard, so we just say that any character can match. |
| 6413 bm->SetRest(offset); | 6397 bm->SetRest(offset); |
| 6414 SaveBMInfo(bm, not_at_start, offset); | 6398 SaveBMInfo(bm, not_at_start, offset); |
| 6415 } | 6399 } |
| 6416 | 6400 |
| 6417 | 6401 |
| 6418 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == | 6402 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == |
| 6419 RegExpMacroAssembler::kTableSize); | 6403 RegExpMacroAssembler::kTableSize); |
| 6420 | 6404 |
| 6421 void ChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, | 6405 |
| 6406 void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| 6422 BoyerMooreLookahead* bm, bool not_at_start) { | 6407 BoyerMooreLookahead* bm, bool not_at_start) { |
| 6423 ZoneList<GuardedAlternative>* alts = alternatives(); | 6408 ZoneList<GuardedAlternative>* alts = alternatives(); |
| 6424 budget = (budget - 1) / alts->length(); | 6409 budget = (budget - 1) / alts->length(); |
| 6425 for (int i = 0; i < alts->length(); i++) { | 6410 for (int i = 0; i < alts->length(); i++) { |
| 6426 GuardedAlternative& alt = alts->at(i); | 6411 GuardedAlternative& alt = alts->at(i); |
| 6427 if (alt.guards() != NULL && alt.guards()->length() != 0) { | 6412 if (alt.guards() != NULL && alt.guards()->length() != 0) { |
| 6428 bm->SetRest(offset); // Give up trying to fill in info. | 6413 bm->SetRest(offset); // Give up trying to fill in info. |
| 6429 SaveBMInfo(bm, not_at_start, offset); | 6414 SaveBMInfo(bm, not_at_start, offset); |
| 6430 return; | 6415 return; |
| 6431 } | 6416 } |
| 6432 alt.node()->FillInBMInfo(compiler, offset, budget, bm, not_at_start); | 6417 alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start); |
| 6433 } | 6418 } |
| 6434 SaveBMInfo(bm, not_at_start, offset); | 6419 SaveBMInfo(bm, not_at_start, offset); |
| 6435 } | 6420 } |
| 6436 | 6421 |
| 6437 void TextNode::FillInBMInfo(RegExpCompiler* compiler, int initial_offset, | 6422 |
| 6438 int budget, BoyerMooreLookahead* bm, | 6423 void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, |
| 6439 bool not_at_start) { | 6424 BoyerMooreLookahead* bm, bool not_at_start) { |
| 6440 if (initial_offset >= bm->length()) return; | 6425 if (initial_offset >= bm->length()) return; |
| 6441 int offset = initial_offset; | 6426 int offset = initial_offset; |
| 6442 int max_char = bm->max_char(); | 6427 int max_char = bm->max_char(); |
| 6443 for (int i = 0; i < elements()->length(); i++) { | 6428 for (int i = 0; i < elements()->length(); i++) { |
| 6444 if (offset >= bm->length()) { | 6429 if (offset >= bm->length()) { |
| 6445 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6430 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6446 return; | 6431 return; |
| 6447 } | 6432 } |
| 6448 TextElement text = elements()->at(i); | 6433 TextElement text = elements()->at(i); |
| 6449 if (text.text_type() == TextElement::ATOM) { | 6434 if (text.text_type() == TextElement::ATOM) { |
| 6450 RegExpAtom* atom = text.atom(); | 6435 RegExpAtom* atom = text.atom(); |
| 6451 for (int j = 0; j < atom->length(); j++, offset++) { | 6436 for (int j = 0; j < atom->length(); j++, offset++) { |
| 6452 if (offset >= bm->length()) { | 6437 if (offset >= bm->length()) { |
| 6453 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6438 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6454 return; | 6439 return; |
| 6455 } | 6440 } |
| 6456 uc16 character = atom->data()[j]; | 6441 uc16 character = atom->data()[j]; |
| 6457 if (bm->compiler()->ignore_case()) { | 6442 if (bm->compiler()->ignore_case()) { |
| 6458 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; | 6443 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| 6459 int length = GetCaseIndependentLetters(compiler, character, chars); | 6444 int length = GetCaseIndependentLetters( |
| 6445 isolate, character, bm->max_char() == String::kMaxOneByteCharCode, |
| 6446 chars); |
| 6460 for (int j = 0; j < length; j++) { | 6447 for (int j = 0; j < length; j++) { |
| 6461 bm->Set(offset, chars[j]); | 6448 bm->Set(offset, chars[j]); |
| 6462 } | 6449 } |
| 6463 } else { | 6450 } else { |
| 6464 if (character <= max_char) bm->Set(offset, character); | 6451 if (character <= max_char) bm->Set(offset, character); |
| 6465 } | 6452 } |
| 6466 } | 6453 } |
| 6467 } else { | 6454 } else { |
| 6468 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); | 6455 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type()); |
| 6469 RegExpCharacterClass* char_class = text.char_class(); | 6456 RegExpCharacterClass* char_class = text.char_class(); |
| 6470 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); | 6457 ZoneList<CharacterRange>* ranges = char_class->ranges(zone()); |
| 6471 if (char_class->is_negated()) { | 6458 if (char_class->is_negated()) { |
| 6472 bm->SetAll(offset); | 6459 bm->SetAll(offset); |
| 6473 } else { | 6460 } else { |
| 6474 for (int k = 0; k < ranges->length(); k++) { | 6461 for (int k = 0; k < ranges->length(); k++) { |
| 6475 CharacterRange& range = ranges->at(k); | 6462 CharacterRange& range = ranges->at(k); |
| 6476 if (range.from() > max_char) continue; | 6463 if (range.from() > max_char) continue; |
| 6477 int to = Min(max_char, static_cast<int>(range.to())); | 6464 int to = Min(max_char, static_cast<int>(range.to())); |
| 6478 bm->SetInterval(offset, Interval(range.from(), to)); | 6465 bm->SetInterval(offset, Interval(range.from(), to)); |
| 6479 } | 6466 } |
| 6480 } | 6467 } |
| 6481 offset++; | 6468 offset++; |
| 6482 } | 6469 } |
| 6483 } | 6470 } |
| 6484 if (offset >= bm->length()) { | 6471 if (offset >= bm->length()) { |
| 6485 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6472 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6486 return; | 6473 return; |
| 6487 } | 6474 } |
| 6488 on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, | 6475 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, |
| 6489 true); // Not at start after a text node. | 6476 true); // Not at start after a text node. |
| 6490 if (initial_offset == 0) set_bm_info(not_at_start, bm); | 6477 if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| 6491 } | 6478 } |
| 6492 | 6479 |
| 6493 | 6480 |
| 6494 // ------------------------------------------------------------------- | 6481 // ------------------------------------------------------------------- |
| 6495 // Dispatch table construction | 6482 // Dispatch table construction |
| 6496 | 6483 |
| 6497 | 6484 |
| 6498 void DispatchTableConstructor::VisitEnd(EndNode* that) { | 6485 void DispatchTableConstructor::VisitEnd(EndNode* that) { |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6636 } | 6623 } |
| 6637 | 6624 |
| 6638 | 6625 |
| 6639 RegExpEngine::CompilationResult RegExpEngine::Compile( | 6626 RegExpEngine::CompilationResult RegExpEngine::Compile( |
| 6640 Isolate* isolate, Zone* zone, RegExpCompileData* data, | 6627 Isolate* isolate, Zone* zone, RegExpCompileData* data, |
| 6641 JSRegExp::Flags flags, Handle<String> pattern, | 6628 JSRegExp::Flags flags, Handle<String> pattern, |
| 6642 Handle<String> sample_subject, bool is_one_byte) { | 6629 Handle<String> sample_subject, bool is_one_byte) { |
| 6643 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { | 6630 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| 6644 return IrregexpRegExpTooBig(isolate); | 6631 return IrregexpRegExpTooBig(isolate); |
| 6645 } | 6632 } |
| 6633 bool ignore_case = flags & JSRegExp::kIgnoreCase; |
| 6646 bool is_sticky = flags & JSRegExp::kSticky; | 6634 bool is_sticky = flags & JSRegExp::kSticky; |
| 6647 bool is_global = flags & JSRegExp::kGlobal; | 6635 bool is_global = flags & JSRegExp::kGlobal; |
| 6648 bool is_unicode = flags & JSRegExp::kUnicode; | 6636 bool is_unicode = flags & JSRegExp::kUnicode; |
| 6649 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, | 6637 RegExpCompiler compiler(isolate, zone, data->capture_count, flags, |
| 6650 is_one_byte); | 6638 is_one_byte); |
| 6651 | 6639 |
| 6652 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); | 6640 if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern)); |
| 6653 | 6641 |
| 6654 // Sample some characters from the middle of the string. | 6642 // Sample some characters from the middle of the string. |
| 6655 static const int kSampleSize = 128; | 6643 static const int kSampleSize = 128; |
| (...skipping 29 matching lines...) Expand all Loading... |
| 6685 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); | 6673 ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone); |
| 6686 first_step_node->AddAlternative(GuardedAlternative(captured_body)); | 6674 first_step_node->AddAlternative(GuardedAlternative(captured_body)); |
| 6687 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( | 6675 first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode( |
| 6688 new (zone) RegExpCharacterClass('*'), false, loop_node))); | 6676 new (zone) RegExpCharacterClass('*'), false, loop_node))); |
| 6689 node = first_step_node; | 6677 node = first_step_node; |
| 6690 } else { | 6678 } else { |
| 6691 node = loop_node; | 6679 node = loop_node; |
| 6692 } | 6680 } |
| 6693 } | 6681 } |
| 6694 if (is_one_byte) { | 6682 if (is_one_byte) { |
| 6695 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); | 6683 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 6696 // Do it again to propagate the new nodes to places where they were not | 6684 // Do it again to propagate the new nodes to places where they were not |
| 6697 // put because they had not been calculated yet. | 6685 // put because they had not been calculated yet. |
| 6698 if (node != NULL) { | 6686 if (node != NULL) { |
| 6699 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); | 6687 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| 6700 } | 6688 } |
| 6701 } else if (compiler.unicode() && (is_global || is_sticky)) { | 6689 } else if (compiler.unicode() && (is_global || is_sticky)) { |
| 6702 node = OptionallyStepBackToLeadSurrogate(&compiler, node); | 6690 node = OptionallyStepBackToLeadSurrogate(&compiler, node); |
| 6703 } | 6691 } |
| 6704 | 6692 |
| 6705 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); | 6693 if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone); |
| 6706 data->node = node; | 6694 data->node = node; |
| 6707 Analysis analysis(isolate, flags, is_one_byte); | 6695 Analysis analysis(isolate, flags, is_one_byte); |
| 6708 analysis.EnsureAnalyzed(node); | 6696 analysis.EnsureAnalyzed(node); |
| 6709 if (analysis.has_failed()) { | 6697 if (analysis.has_failed()) { |
| (...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6887 | 6875 |
| 6888 | 6876 |
| 6889 void RegExpResultsCache::Clear(FixedArray* cache) { | 6877 void RegExpResultsCache::Clear(FixedArray* cache) { |
| 6890 for (int i = 0; i < kRegExpResultsCacheSize; i++) { | 6878 for (int i = 0; i < kRegExpResultsCacheSize; i++) { |
| 6891 cache->set(i, Smi::FromInt(0)); | 6879 cache->set(i, Smi::FromInt(0)); |
| 6892 } | 6880 } |
| 6893 } | 6881 } |
| 6894 | 6882 |
| 6895 } // namespace internal | 6883 } // namespace internal |
| 6896 } // namespace v8 | 6884 } // namespace v8 |
| OLD | NEW |