Chromium Code Reviews| Index: src/regexp/jsregexp.cc |
| diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc |
| index b0a294bce16e17a100f46149dc5f57bb6cd99e0c..49422973d107ae555666956236bdafd1efb2888b 100644 |
| --- a/src/regexp/jsregexp.cc |
| +++ b/src/regexp/jsregexp.cc |
| @@ -1588,16 +1588,31 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
| // Returns the number of characters in the equivalence class, omitting those |
| // that cannot occur in the source string because it is Latin1. |
| -static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, |
| - bool one_byte_subject, |
| - unibrow::uchar* letters) { |
| - int length = |
| - isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
| - // Unibrow returns 0 or 1 for characters where case independence is |
| - // trivial. |
| - if (length == 0) { |
| - letters[0] = character; |
| - length = 1; |
| +static int GetCaseIndependentLetters(RegExpCompiler* compiler, uc16 character, |
| + bool one_byte_subject, uc32* letters) { |
|
erikcorry
2016/02/01 15:21:40
The compiler knows whether we have a one-byte subj
Yang
2016/02/02 06:06:48
Done.
|
| + int length; |
| +#ifdef V8_I18N_SUPPORT |
| + if (compiler->unicode()) { |
| + USet* set = uset_open(character, character); |
| + uset_closeOver(set, USET_CASE_INSENSITIVE); |
| + uset_removeAllStrings(set); |
| + length = uset_size(set); |
| + for (int i = 0; i < length; i++) { |
| + letters[i] = uset_charAt(set, i); |
| + } |
| + uset_close(set); |
| + } else // NOLINT |
| + // Fallback in case ICU is not included. |
| +#endif // V8_I18N_SUPPORT |
| + { |
| + length = compiler->isolate()->jsregexp_uncanonicalize()->get(character, |
| + '\0', letters); |
| + // Unibrow returns 0 or 1 for characters where case independence is |
| + // trivial. |
| + if (length == 0) { |
| + letters[0] = character; |
| + length = 1; |
| + } |
| } |
| if (one_byte_subject) { |
| @@ -1613,14 +1628,9 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, |
| return length; |
| } |
| - |
| -static inline bool EmitSimpleCharacter(Isolate* isolate, |
| - RegExpCompiler* compiler, |
| - uc16 c, |
| - Label* on_failure, |
| - int cp_offset, |
| - bool check, |
| - bool preloaded) { |
| +static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, uc16 c, |
| + Label* on_failure, int cp_offset, |
| + bool check, bool preloaded) { |
| RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| bool bound_checked = false; |
| if (!preloaded) { |
| @@ -1637,17 +1647,13 @@ static inline bool EmitSimpleCharacter(Isolate* isolate, |
| // Only emits non-letters (things that don't have case). Only used for case |
| // independent matches. |
| -static inline bool EmitAtomNonLetter(Isolate* isolate, |
| - RegExpCompiler* compiler, |
| - uc16 c, |
| - Label* on_failure, |
| - int cp_offset, |
| - bool check, |
| - bool preloaded) { |
| +static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, uc16 c, |
| + Label* on_failure, int cp_offset, |
| + bool check, bool preloaded) { |
| RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| bool one_byte = compiler->one_byte(); |
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| - int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
| + int length = GetCaseIndependentLetters(compiler, c, one_byte, chars); |
| if (length < 1) { |
| // This can't match. Must be an one-byte subject and a non-one-byte |
| // character. We do not need to do anything since the one-byte pass |
| @@ -1657,10 +1663,6 @@ static inline bool EmitAtomNonLetter(Isolate* isolate, |
| bool checked = false; |
| // We handle the length > 1 case in a later pass. |
| if (length == 1) { |
| - if (one_byte && c > String::kMaxOneByteCharCodeU) { |
|
erikcorry
2016/02/01 15:21:40
I think this is still worth having. For these cha
Yang
2016/02/02 06:06:48
I'm confused here. Wouldn't the FilterOneByte pass
|
| - // Can't match - see above. |
| - return false; // Bounds not checked. |
| - } |
| if (!preloaded) { |
| macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check); |
| checked = check; |
| @@ -1707,28 +1709,19 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
| return false; |
| } |
| - |
| -typedef bool EmitCharacterFunction(Isolate* isolate, |
| - RegExpCompiler* compiler, |
| - uc16 c, |
| - Label* on_failure, |
| - int cp_offset, |
| - bool check, |
| +typedef bool EmitCharacterFunction(RegExpCompiler* compiler, uc16 c, |
| + Label* on_failure, int cp_offset, bool check, |
| bool preloaded); |
| // Only emits letters (things that have case). Only used for case independent |
| // matches. |
| -static inline bool EmitAtomLetter(Isolate* isolate, |
| - RegExpCompiler* compiler, |
| - uc16 c, |
| - Label* on_failure, |
| - int cp_offset, |
| - bool check, |
| +static inline bool EmitAtomLetter(RegExpCompiler* compiler, uc16 c, |
| + Label* on_failure, int cp_offset, bool check, |
| bool preloaded) { |
| RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| bool one_byte = compiler->one_byte(); |
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| - int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
| + int length = GetCaseIndependentLetters(compiler, c, one_byte, chars); |
| if (length <= 1) return false; |
| // We may not need to check against the end of the input string |
| // if this character lies before a character that matched. |
| @@ -2277,13 +2270,12 @@ int ActionNode::EatsAtLeast(int still_to_find, |
| not_at_start); |
| } |
| - |
| -void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| +void ActionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, |
| BoyerMooreLookahead* bm, bool not_at_start) { |
| if (action_type_ == BEGIN_SUBMATCH) { |
| bm->SetRest(offset); |
| } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
| - on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| + on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
| } |
| SaveBMInfo(bm, not_at_start, offset); |
| } |
| @@ -2304,12 +2296,12 @@ int AssertionNode::EatsAtLeast(int still_to_find, |
| not_at_start); |
| } |
| - |
| -void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| - BoyerMooreLookahead* bm, bool not_at_start) { |
| +void AssertionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
| + int budget, BoyerMooreLookahead* bm, |
| + bool not_at_start) { |
| // Match the behaviour of EatsAtLeast on this node. |
| if (assertion_type() == AT_START && not_at_start) return; |
| - on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| + on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
| SaveBMInfo(bm, not_at_start, offset); |
| } |
| @@ -2523,7 +2515,6 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| // Do not collect any quick check details if the text node reads backward, |
| // since it reads in the opposite direction than we use for quick checks. |
| if (read_backward()) return; |
| - Isolate* isolate = compiler->macro_assembler()->isolate(); |
| DCHECK(characters_filled_in < details->characters()); |
| int characters = details->characters(); |
| int char_mask; |
| @@ -2542,7 +2533,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| uc16 c = quarks[i]; |
| if (compiler->ignore_case()) { |
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| - int length = GetCaseIndependentLetters(isolate, c, |
| + int length = GetCaseIndependentLetters(compiler, c, |
| compiler->one_byte(), chars); |
| if (length == 0) { |
| // This can happen because all case variants are non-Latin1, but we |
| @@ -2748,18 +2739,17 @@ class VisitMarker { |
| NodeInfo* info_; |
| }; |
| - |
| -RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) { |
| +RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| if (info()->replacement_calculated) return replacement(); |
| if (depth < 0) return this; |
| DCHECK(!info()->visited); |
| VisitMarker marker(info()); |
| - return FilterSuccessor(depth - 1, ignore_case); |
| + return FilterSuccessor(depth - 1, compiler); |
| } |
| - |
| -RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { |
| - RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
| +RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, |
| + RegExpCompiler* compiler) { |
| + RegExpNode* next = on_success_->FilterOneByte(depth - 1, compiler); |
| if (next == NULL) return set_replacement(NULL); |
| on_success_ = next; |
| return set_replacement(this); |
| @@ -2782,8 +2772,30 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { |
| return false; |
| } |
| +static uc16 ConvertNonLatin1ToEquivalentLatin1(bool unicode, uc16 c) { |
| +#ifdef V8_I18N_SUPPORT |
| + if (unicode) { |
| + USet* set = uset_open(c, c); |
| + uset_closeOver(set, USET_CASE_INSENSITIVE); |
| + uset_removeAllStrings(set); |
| + int length = uset_size(set); |
| + uc16 result = 0; |
| + for (int i = 0; i < length; i++) { |
| + uc32 c = uset_charAt(set, i); |
| + if (c <= String::kMaxOneByteCharCode) { |
| + result = static_cast<uc16>(c); |
| + break; |
| + } |
| + } |
| + uset_close(set); |
| + return result; |
| + } |
| + // Fallback to unibrow if ICU is not included. |
| +#endif // V8_I18N_SUPPORT |
| + return unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
| +} |
| -RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
| +RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| if (info()->replacement_calculated) return replacement(); |
| if (depth < 0) return this; |
| DCHECK(!info()->visited); |
| @@ -2794,16 +2806,17 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
| if (elm.text_type() == TextElement::ATOM) { |
| Vector<const uc16> quarks = elm.atom()->data(); |
| for (int j = 0; j < quarks.length(); j++) { |
| - uint16_t c = quarks[j]; |
| + uc16 c = quarks[j]; |
| if (c <= String::kMaxOneByteCharCode) continue; |
| - if (!ignore_case) return set_replacement(NULL); |
| + if (!compiler->ignore_case()) return set_replacement(NULL); |
| // Here, we need to check for characters whose upper and lower cases |
| // are outside the Latin-1 range. |
| - uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
| + uc16 converted = |
| + ConvertNonLatin1ToEquivalentLatin1(compiler->unicode(), c); |
| // Character is outside Latin-1 completely |
| if (converted == 0) return set_replacement(NULL); |
| // Convert quark to Latin-1 in place. |
| - uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
| + uc16* copy = const_cast<uc16*>(quarks.start()); |
| copy[j] = converted; |
| } |
| } else { |
| @@ -2818,24 +2831,25 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
| ranges->at(0).from() == 0 && |
| ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
| // This will be handled in a later filter. |
| - if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| + if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) |
| + continue; |
| return set_replacement(NULL); |
| } |
| } else { |
| if (range_count == 0 || |
| ranges->at(0).from() > String::kMaxOneByteCharCode) { |
| // This will be handled in a later filter. |
| - if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
| + if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) |
| + continue; |
| return set_replacement(NULL); |
| } |
| } |
| } |
| } |
| - return FilterSuccessor(depth - 1, ignore_case); |
| + return FilterSuccessor(depth - 1, compiler); |
| } |
| - |
| -RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| +RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| if (info()->replacement_calculated) return replacement(); |
| if (depth < 0) return this; |
| if (info()->visited) return this; |
| @@ -2843,17 +2857,16 @@ RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| VisitMarker marker(info()); |
| RegExpNode* continue_replacement = |
| - continue_node_->FilterOneByte(depth - 1, ignore_case); |
| + continue_node_->FilterOneByte(depth - 1, compiler); |
| // If we can't continue after the loop then there is no sense in doing the |
| // loop. |
| if (continue_replacement == NULL) return set_replacement(NULL); |
| } |
| - return ChoiceNode::FilterOneByte(depth - 1, ignore_case); |
| + return ChoiceNode::FilterOneByte(depth - 1, compiler); |
| } |
| - |
| -RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| +RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
| if (info()->replacement_calculated) return replacement(); |
| if (depth < 0) return this; |
| if (info()->visited) return this; |
| @@ -2873,7 +2886,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| for (int i = 0; i < choice_count; i++) { |
| GuardedAlternative alternative = alternatives_->at(i); |
| RegExpNode* replacement = |
| - alternative.node()->FilterOneByte(depth - 1, ignore_case); |
| + alternative.node()->FilterOneByte(depth - 1, compiler); |
| DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. |
| if (replacement != NULL) { |
| alternatives_->at(i).set_node(replacement); |
| @@ -2893,7 +2906,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); |
| for (int i = 0; i < choice_count; i++) { |
| RegExpNode* replacement = |
| - alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case); |
| + alternatives_->at(i).node()->FilterOneByte(depth - 1, compiler); |
| if (replacement != NULL) { |
| alternatives_->at(i).set_node(replacement); |
| new_alternatives->Add(alternatives_->at(i), zone()); |
| @@ -2903,9 +2916,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
| return this; |
| } |
| - |
| -RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, |
| - bool ignore_case) { |
| +RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte( |
| + int depth, RegExpCompiler* compiler) { |
| if (info()->replacement_calculated) return replacement(); |
| if (depth < 0) return this; |
| if (info()->visited) return this; |
| @@ -2913,12 +2925,12 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, |
| // Alternative 0 is the negative lookahead, alternative 1 is what comes |
| // afterwards. |
| RegExpNode* node = alternatives_->at(1).node(); |
| - RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); |
| + RegExpNode* replacement = node->FilterOneByte(depth - 1, compiler); |
| if (replacement == NULL) return set_replacement(NULL); |
| alternatives_->at(1).set_node(replacement); |
| RegExpNode* neg_node = alternatives_->at(0).node(); |
| - RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); |
| + RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, compiler); |
| // If the negative lookahead is always going to fail then |
| // we don't need to check it. |
| if (neg_replacement == NULL) return set_replacement(replacement); |
| @@ -2939,15 +2951,15 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
| not_at_start); |
| } |
| - |
| -void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| - BoyerMooreLookahead* bm, bool not_at_start) { |
| +void LoopChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
| + int budget, BoyerMooreLookahead* bm, |
| + bool not_at_start) { |
| if (body_can_be_zero_length_ || budget <= 0) { |
| bm->SetRest(offset); |
| SaveBMInfo(bm, not_at_start, offset); |
| return; |
| } |
| - ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
| + ChoiceNode::FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
| SaveBMInfo(bm, not_at_start, offset); |
| } |
| @@ -3039,7 +3051,6 @@ static void EmitHat(RegExpCompiler* compiler, |
| // Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
| void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
| RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| - Isolate* isolate = assembler->isolate(); |
| Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
| bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
| BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
| @@ -3051,7 +3062,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
| if (eats_at_least >= 1) { |
| BoyerMooreLookahead* bm = |
| new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); |
| - FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start); |
| + FillInBMInfo(compiler, 0, kRecursionBudget, bm, not_at_start); |
| if (bm->at(0)->is_non_word()) |
| next_is_word_character = Trace::FALSE_VALUE; |
| if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
| @@ -3223,7 +3234,6 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
| bool first_element_checked, |
| int* checked_up_to) { |
| RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
| - Isolate* isolate = assembler->isolate(); |
| bool one_byte = compiler->one_byte(); |
| Label* backtrack = trace->backtrack(); |
| QuickCheckDetails* quick_check = trace->quick_check_performed(); |
| @@ -3241,7 +3251,11 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
| switch (pass) { |
| case NON_LATIN1_MATCH: |
| DCHECK(one_byte); |
| - if (quarks[j] > String::kMaxOneByteCharCode) { |
| + if (quarks[j] > String::kMaxOneByteCharCode && |
| + !(compiler->unicode() && compiler->ignore_case())) { |
| + // In the non-unicode case, if the pattern is non-Latin1, it |
|
erikcorry
2016/02/01 15:21:40
I think you mean in the one-byte case (non-unicode
Yang
2016/02/02 06:06:48
I did mean this. With /i, if a character is outsid
|
| + // cannot possibly match a Latin1 character, unless we have the |
| + // /ui flags, e.g. /\u212b/ui matches "\u00c5". |
| assembler->GoTo(backtrack); |
| return; |
| } |
| @@ -3261,8 +3275,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
| if (emit_function != NULL) { |
| bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); |
| bool bound_checked = |
| - emit_function(isolate, compiler, quarks[j], backtrack, |
| - cp_offset + j, bounds_check, preloaded); |
| + emit_function(compiler, quarks[j], backtrack, cp_offset + j, |
| + bounds_check, preloaded); |
| if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
| } |
| } |
| @@ -4096,7 +4110,6 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
| DCHECK(trace->is_trivial()); |
| RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
| - Isolate* isolate = macro_assembler->isolate(); |
| // At this point we know that we are at a non-greedy loop that will eat |
| // any character one at a time. Any non-anchored regexp has such a |
| // loop prepended to it in order to find where it starts. We look for |
| @@ -4115,7 +4128,7 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
| compiler, |
| zone()); |
| GuardedAlternative alt0 = alternatives_->at(0); |
| - alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false); |
| + alt0.node()->FillInBMInfo(compiler, 0, kRecursionBudget, bm, false); |
| } |
| } |
| if (bm != NULL) { |
| @@ -6372,9 +6385,8 @@ void Analysis::VisitAssertion(AssertionNode* that) { |
| EnsureAnalyzed(that->on_success()); |
| } |
| - |
| -void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| - BoyerMooreLookahead* bm, |
| +void BackReferenceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
| + int budget, BoyerMooreLookahead* bm, |
| bool not_at_start) { |
| // Working out the set of characters that a backreference can match is too |
| // hard, so we just say that any character can match. |
| @@ -6386,8 +6398,7 @@ void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == |
| RegExpMacroAssembler::kTableSize); |
| - |
| -void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| +void ChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, |
| BoyerMooreLookahead* bm, bool not_at_start) { |
| ZoneList<GuardedAlternative>* alts = alternatives(); |
| budget = (budget - 1) / alts->length(); |
| @@ -6398,14 +6409,14 @@ void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
| SaveBMInfo(bm, not_at_start, offset); |
| return; |
| } |
| - alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start); |
| + alt.node()->FillInBMInfo(compiler, offset, budget, bm, not_at_start); |
| } |
| SaveBMInfo(bm, not_at_start, offset); |
| } |
| - |
| -void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, |
| - BoyerMooreLookahead* bm, bool not_at_start) { |
| +void TextNode::FillInBMInfo(RegExpCompiler* compiler, int initial_offset, |
| + int budget, BoyerMooreLookahead* bm, |
| + bool not_at_start) { |
| if (initial_offset >= bm->length()) return; |
| int offset = initial_offset; |
| int max_char = bm->max_char(); |
| @@ -6426,8 +6437,8 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, |
| if (bm->compiler()->ignore_case()) { |
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
| int length = GetCaseIndependentLetters( |
| - isolate, character, bm->max_char() == String::kMaxOneByteCharCode, |
| - chars); |
| + compiler, character, |
| + bm->max_char() == String::kMaxOneByteCharCode, chars); |
| for (int j = 0; j < length; j++) { |
| bm->Set(offset, chars[j]); |
| } |
| @@ -6456,7 +6467,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, |
| if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| return; |
| } |
| - on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, |
| + on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, |
| true); // Not at start after a text node. |
| if (initial_offset == 0) set_bm_info(not_at_start, bm); |
| } |
| @@ -6614,7 +6625,6 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
| if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
| return IrregexpRegExpTooBig(isolate); |
| } |
| - bool ignore_case = flags & JSRegExp::kIgnoreCase; |
| bool is_sticky = flags & JSRegExp::kSticky; |
| bool is_global = flags & JSRegExp::kGlobal; |
| RegExpCompiler compiler(isolate, zone, data->capture_count, flags, |
| @@ -6663,11 +6673,11 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
| } |
| } |
| if (is_one_byte) { |
| - node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| + node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); |
| // Do it again to propagate the new nodes to places where they were not |
| // put because they had not been calculated yet. |
| if (node != NULL) { |
| - node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
| + node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); |
| } |
| } else if (compiler.unicode() && (is_global || is_sticky)) { |
| node = OptionallyStepBackToLeadSurrogate(&compiler, node); |