Index: src/regexp/jsregexp.cc |
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc |
index 654afa9a89e06495f4a070c439004c074f6f12e0..7b510b072b0bba0d6e66726397213c52ead70a9f 100644 |
--- a/src/regexp/jsregexp.cc |
+++ b/src/regexp/jsregexp.cc |
@@ -1598,34 +1598,19 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, |
// Returns the number of characters in the equivalence class, omitting those |
// that cannot occur in the source string because it is Latin1. |
-static int GetCaseIndependentLetters(RegExpCompiler* compiler, uc16 character, |
- uc32* letters) { |
- int length; |
-#ifdef V8_I18N_SUPPORT |
- if (compiler->unicode()) { |
- USet* set = uset_open(character, character); |
- uset_closeOver(set, USET_CASE_INSENSITIVE); |
- uset_removeAllStrings(set); |
- length = uset_size(set); |
- for (int i = 0; i < length; i++) { |
- letters[i] = uset_charAt(set, i); |
- } |
- uset_close(set); |
- } else // NOLINT |
-// Fallback in case ICU is not included. |
-#endif // V8_I18N_SUPPORT |
- { |
- length = compiler->isolate()->jsregexp_uncanonicalize()->get(character, |
- '\0', letters); |
- // Unibrow returns 0 or 1 for characters where case independence is |
- // trivial. |
- if (length == 0) { |
- letters[0] = character; |
- length = 1; |
- } |
- } |
- |
- if (compiler->one_byte()) { |
+static int GetCaseIndependentLetters(Isolate* isolate, uc16 character, |
+ bool one_byte_subject, |
+ unibrow::uchar* letters) { |
+ int length = |
+ isolate->jsregexp_uncanonicalize()->get(character, '\0', letters); |
+ // Unibrow returns 0 or 1 for characters where case independence is |
+ // trivial. |
+ if (length == 0) { |
+ letters[0] = character; |
+ length = 1; |
+ } |
+ |
+ if (one_byte_subject) { |
int new_length = 0; |
for (int i = 0; i < length; i++) { |
if (letters[i] <= String::kMaxOneByteCharCode) { |
@@ -1638,9 +1623,14 @@ static int GetCaseIndependentLetters(RegExpCompiler* compiler, uc16 character, |
return length; |
} |
-static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, uc16 c, |
- Label* on_failure, int cp_offset, |
- bool check, bool preloaded) { |
+ |
+static inline bool EmitSimpleCharacter(Isolate* isolate, |
+ RegExpCompiler* compiler, |
+ uc16 c, |
+ Label* on_failure, |
+ int cp_offset, |
+ bool check, |
+ bool preloaded) { |
RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
bool bound_checked = false; |
if (!preloaded) { |
@@ -1657,12 +1647,17 @@ static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, uc16 c, |
// Only emits non-letters (things that don't have case). Only used for case |
// independent matches. |
-static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, uc16 c, |
- Label* on_failure, int cp_offset, |
- bool check, bool preloaded) { |
+static inline bool EmitAtomNonLetter(Isolate* isolate, |
+ RegExpCompiler* compiler, |
+ uc16 c, |
+ Label* on_failure, |
+ int cp_offset, |
+ bool check, |
+ bool preloaded) { |
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
+ bool one_byte = compiler->one_byte(); |
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
- int length = GetCaseIndependentLetters(compiler, c, chars); |
+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
if (length < 1) { |
// This can't match. Must be an one-byte subject and a non-one-byte |
// character. We do not need to do anything since the one-byte pass |
@@ -1672,8 +1667,8 @@ static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, uc16 c, |
bool checked = false; |
// We handle the length > 1 case in a later pass. |
if (length == 1) { |
- if (compiler->one_byte() && c > String::kMaxOneByteCharCodeU) { |
- // This cannot match. |
+ if (one_byte && c > String::kMaxOneByteCharCodeU) { |
+ // Can't match - see above. |
return false; // Bounds not checked. |
} |
if (!preloaded) { |
@@ -1722,18 +1717,28 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler, |
return false; |
} |
-typedef bool EmitCharacterFunction(RegExpCompiler* compiler, uc16 c, |
- Label* on_failure, int cp_offset, bool check, |
+ |
+typedef bool EmitCharacterFunction(Isolate* isolate, |
+ RegExpCompiler* compiler, |
+ uc16 c, |
+ Label* on_failure, |
+ int cp_offset, |
+ bool check, |
bool preloaded); |
// Only emits letters (things that have case). Only used for case independent |
// matches. |
-static inline bool EmitAtomLetter(RegExpCompiler* compiler, uc16 c, |
- Label* on_failure, int cp_offset, bool check, |
+static inline bool EmitAtomLetter(Isolate* isolate, |
+ RegExpCompiler* compiler, |
+ uc16 c, |
+ Label* on_failure, |
+ int cp_offset, |
+ bool check, |
bool preloaded) { |
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
+ bool one_byte = compiler->one_byte(); |
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
- int length = GetCaseIndependentLetters(compiler, c, chars); |
+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars); |
if (length <= 1) return false; |
// We may not need to check against the end of the input string |
// if this character lies before a character that matched. |
@@ -1744,8 +1749,8 @@ static inline bool EmitAtomLetter(RegExpCompiler* compiler, uc16 c, |
DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); |
switch (length) { |
case 2: { |
- if (ShortCutEmitCharacterPair(macro_assembler, compiler->one_byte(), |
- chars[0], chars[1], on_failure)) { |
+ if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0], |
+ chars[1], on_failure)) { |
} else { |
macro_assembler->CheckCharacter(chars[0], &ok); |
macro_assembler->CheckNotCharacter(chars[1], on_failure); |
@@ -2282,12 +2287,13 @@ int ActionNode::EatsAtLeast(int still_to_find, |
not_at_start); |
} |
-void ActionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, |
+ |
+void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
BoyerMooreLookahead* bm, bool not_at_start) { |
if (action_type_ == BEGIN_SUBMATCH) { |
bm->SetRest(offset); |
} else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) { |
- on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
+ on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
} |
SaveBMInfo(bm, not_at_start, offset); |
} |
@@ -2308,12 +2314,12 @@ int AssertionNode::EatsAtLeast(int still_to_find, |
not_at_start); |
} |
-void AssertionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
- int budget, BoyerMooreLookahead* bm, |
- bool not_at_start) { |
+ |
+void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
+ BoyerMooreLookahead* bm, bool not_at_start) { |
// Match the behaviour of EatsAtLeast on this node. |
if (assertion_type() == AT_START && not_at_start) return; |
- on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
+ on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
SaveBMInfo(bm, not_at_start, offset); |
} |
@@ -2527,6 +2533,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
// Do not collect any quick check details if the text node reads backward, |
// since it reads in the opposite direction than we use for quick checks. |
if (read_backward()) return; |
+ Isolate* isolate = compiler->macro_assembler()->isolate(); |
DCHECK(characters_filled_in < details->characters()); |
int characters = details->characters(); |
int char_mask; |
@@ -2545,7 +2552,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, |
uc16 c = quarks[i]; |
if (compiler->ignore_case()) { |
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
- int length = GetCaseIndependentLetters(compiler, c, chars); |
+ int length = GetCaseIndependentLetters(isolate, c, |
+ compiler->one_byte(), chars); |
if (length == 0) { |
// This can happen because all case variants are non-Latin1, but we |
// know the input is Latin1. |
@@ -2750,17 +2758,18 @@ class VisitMarker { |
NodeInfo* info_; |
}; |
-RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
+ |
+RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) { |
if (info()->replacement_calculated) return replacement(); |
if (depth < 0) return this; |
DCHECK(!info()->visited); |
VisitMarker marker(info()); |
- return FilterSuccessor(depth - 1, compiler); |
+ return FilterSuccessor(depth - 1, ignore_case); |
} |
-RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, |
- RegExpCompiler* compiler) { |
- RegExpNode* next = on_success_->FilterOneByte(depth - 1, compiler); |
+ |
+RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) { |
+ RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case); |
if (next == NULL) return set_replacement(NULL); |
on_success_ = next; |
return set_replacement(this); |
@@ -2783,30 +2792,8 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) { |
return false; |
} |
-static uc16 ConvertNonLatin1ToEquivalentLatin1(bool unicode, uc16 c) { |
-#ifdef V8_I18N_SUPPORT |
- if (unicode) { |
- USet* set = uset_open(c, c); |
- uset_closeOver(set, USET_CASE_INSENSITIVE); |
- uset_removeAllStrings(set); |
- int length = uset_size(set); |
- uc16 result = 0; |
- for (int i = 0; i < length; i++) { |
- uc32 c = uset_charAt(set, i); |
- if (c <= String::kMaxOneByteCharCode) { |
- result = static_cast<uc16>(c); |
- break; |
- } |
- } |
- uset_close(set); |
- return result; |
- } |
-// Fallback to unibrow if ICU is not included. |
-#endif // V8_I18N_SUPPORT |
- return unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
-} |
-RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
+RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) { |
if (info()->replacement_calculated) return replacement(); |
if (depth < 0) return this; |
DCHECK(!info()->visited); |
@@ -2817,17 +2804,16 @@ RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
if (elm.text_type() == TextElement::ATOM) { |
Vector<const uc16> quarks = elm.atom()->data(); |
for (int j = 0; j < quarks.length(); j++) { |
- uc16 c = quarks[j]; |
+ uint16_t c = quarks[j]; |
if (c <= String::kMaxOneByteCharCode) continue; |
- if (!compiler->ignore_case()) return set_replacement(NULL); |
+ if (!ignore_case) return set_replacement(NULL); |
// Here, we need to check for characters whose upper and lower cases |
// are outside the Latin-1 range. |
- uc16 converted = |
- ConvertNonLatin1ToEquivalentLatin1(compiler->unicode(), c); |
+ uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c); |
// Character is outside Latin-1 completely |
if (converted == 0) return set_replacement(NULL); |
// Convert quark to Latin-1 in place. |
- uc16* copy = const_cast<uc16*>(quarks.start()); |
+ uint16_t* copy = const_cast<uint16_t*>(quarks.start()); |
copy[j] = converted; |
} |
} else { |
@@ -2842,25 +2828,24 @@ RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
ranges->at(0).from() == 0 && |
ranges->at(0).to() >= String::kMaxOneByteCharCode) { |
// This will be handled in a later filter. |
- if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) |
- continue; |
+ if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
return set_replacement(NULL); |
} |
} else { |
if (range_count == 0 || |
ranges->at(0).from() > String::kMaxOneByteCharCode) { |
// This will be handled in a later filter. |
- if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges)) |
- continue; |
+ if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue; |
return set_replacement(NULL); |
} |
} |
} |
} |
- return FilterSuccessor(depth - 1, compiler); |
+ return FilterSuccessor(depth - 1, ignore_case); |
} |
-RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
+ |
+RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
if (info()->replacement_calculated) return replacement(); |
if (depth < 0) return this; |
if (info()->visited) return this; |
@@ -2868,16 +2853,17 @@ RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
VisitMarker marker(info()); |
RegExpNode* continue_replacement = |
- continue_node_->FilterOneByte(depth - 1, compiler); |
+ continue_node_->FilterOneByte(depth - 1, ignore_case); |
// If we can't continue after the loop then there is no sense in doing the |
// loop. |
if (continue_replacement == NULL) return set_replacement(NULL); |
} |
- return ChoiceNode::FilterOneByte(depth - 1, compiler); |
+ return ChoiceNode::FilterOneByte(depth - 1, ignore_case); |
} |
-RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
+ |
+RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) { |
if (info()->replacement_calculated) return replacement(); |
if (depth < 0) return this; |
if (info()->visited) return this; |
@@ -2897,7 +2883,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
for (int i = 0; i < choice_count; i++) { |
GuardedAlternative alternative = alternatives_->at(i); |
RegExpNode* replacement = |
- alternative.node()->FilterOneByte(depth - 1, compiler); |
+ alternative.node()->FilterOneByte(depth - 1, ignore_case); |
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK. |
if (replacement != NULL) { |
alternatives_->at(i).set_node(replacement); |
@@ -2917,7 +2903,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
new(zone()) ZoneList<GuardedAlternative>(surviving, zone()); |
for (int i = 0; i < choice_count; i++) { |
RegExpNode* replacement = |
- alternatives_->at(i).node()->FilterOneByte(depth - 1, compiler); |
+ alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case); |
if (replacement != NULL) { |
alternatives_->at(i).set_node(replacement); |
new_alternatives->Add(alternatives_->at(i), zone()); |
@@ -2927,8 +2913,9 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) { |
return this; |
} |
-RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte( |
- int depth, RegExpCompiler* compiler) { |
+ |
+RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth, |
+ bool ignore_case) { |
if (info()->replacement_calculated) return replacement(); |
if (depth < 0) return this; |
if (info()->visited) return this; |
@@ -2936,12 +2923,12 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte( |
// Alternative 0 is the negative lookahead, alternative 1 is what comes |
// afterwards. |
RegExpNode* node = alternatives_->at(1).node(); |
- RegExpNode* replacement = node->FilterOneByte(depth - 1, compiler); |
+ RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case); |
if (replacement == NULL) return set_replacement(NULL); |
alternatives_->at(1).set_node(replacement); |
RegExpNode* neg_node = alternatives_->at(0).node(); |
- RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, compiler); |
+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case); |
// If the negative lookahead is always going to fail then |
// we don't need to check it. |
if (neg_replacement == NULL) return set_replacement(replacement); |
@@ -2962,15 +2949,15 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, |
not_at_start); |
} |
-void LoopChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
- int budget, BoyerMooreLookahead* bm, |
- bool not_at_start) { |
+ |
+void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
+ BoyerMooreLookahead* bm, bool not_at_start) { |
if (body_can_be_zero_length_ || budget <= 0) { |
bm->SetRest(offset); |
SaveBMInfo(bm, not_at_start, offset); |
return; |
} |
- ChoiceNode::FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start); |
+ ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start); |
SaveBMInfo(bm, not_at_start, offset); |
} |
@@ -3062,6 +3049,7 @@ static void EmitHat(RegExpCompiler* compiler, |
// Emit the code to handle \b and \B (word-boundary or non-word-boundary). |
void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
+ Isolate* isolate = assembler->isolate(); |
Trace::TriBool next_is_word_character = Trace::UNKNOWN; |
bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE); |
BoyerMooreLookahead* lookahead = bm_info(not_at_start); |
@@ -3073,7 +3061,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { |
if (eats_at_least >= 1) { |
BoyerMooreLookahead* bm = |
new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone()); |
- FillInBMInfo(compiler, 0, kRecursionBudget, bm, not_at_start); |
+ FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start); |
if (bm->at(0)->is_non_word()) |
next_is_word_character = Trace::FALSE_VALUE; |
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE; |
@@ -3245,6 +3233,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
bool first_element_checked, |
int* checked_up_to) { |
RegExpMacroAssembler* assembler = compiler->macro_assembler(); |
+ Isolate* isolate = assembler->isolate(); |
bool one_byte = compiler->one_byte(); |
Label* backtrack = trace->backtrack(); |
QuickCheckDetails* quick_check = trace->quick_check_performed(); |
@@ -3262,7 +3251,6 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
switch (pass) { |
case NON_LATIN1_MATCH: |
DCHECK(one_byte); |
- DCHECK(!(compiler->unicode() && compiler->ignore_case())); |
if (quarks[j] > String::kMaxOneByteCharCode) { |
assembler->GoTo(backtrack); |
return; |
@@ -3283,8 +3271,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, |
if (emit_function != NULL) { |
bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); |
bool bound_checked = |
- emit_function(compiler, quarks[j], backtrack, cp_offset + j, |
- bounds_check, preloaded); |
+ emit_function(isolate, compiler, quarks[j], backtrack, |
+ cp_offset + j, bounds_check, preloaded); |
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); |
} |
} |
@@ -3367,13 +3355,7 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) { |
return; |
} |
- if (compiler->one_byte() && |
- !(compiler->unicode() && compiler->ignore_case())) { |
- // If any character within the text node is outside the Latin1 range, it |
- // cannot possibly match anything in a one-byte string. This still holds |
- // for case-insensitive non-unicode regexp patterns. However, for |
- // case-insensitive unicode regexp patterns, this is no longer true, e.g. |
- // /\u212b/ui matches "\u00c5". |
+ if (compiler->one_byte()) { |
int dummy = 0; |
TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy); |
} |
@@ -4125,6 +4107,7 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
DCHECK(trace->is_trivial()); |
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); |
+ Isolate* isolate = macro_assembler->isolate(); |
// At this point we know that we are at a non-greedy loop that will eat |
// any character one at a time. Any non-anchored regexp has such a |
// loop prepended to it in order to find where it starts. We look for |
@@ -4143,7 +4126,7 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, |
compiler, |
zone()); |
GuardedAlternative alt0 = alternatives_->at(0); |
- alt0.node()->FillInBMInfo(compiler, 0, kRecursionBudget, bm, false); |
+ alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false); |
} |
} |
if (bm != NULL) { |
@@ -6405,8 +6388,9 @@ void Analysis::VisitAssertion(AssertionNode* that) { |
EnsureAnalyzed(that->on_success()); |
} |
-void BackReferenceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
- int budget, BoyerMooreLookahead* bm, |
+ |
+void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
+ BoyerMooreLookahead* bm, |
bool not_at_start) { |
// Working out the set of characters that a backreference can match is too |
// hard, so we just say that any character can match. |
@@ -6418,7 +6402,8 @@ void BackReferenceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, |
STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == |
RegExpMacroAssembler::kTableSize); |
-void ChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, |
+ |
+void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget, |
BoyerMooreLookahead* bm, bool not_at_start) { |
ZoneList<GuardedAlternative>* alts = alternatives(); |
budget = (budget - 1) / alts->length(); |
@@ -6429,14 +6414,14 @@ void ChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget, |
SaveBMInfo(bm, not_at_start, offset); |
return; |
} |
- alt.node()->FillInBMInfo(compiler, offset, budget, bm, not_at_start); |
+ alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start); |
} |
SaveBMInfo(bm, not_at_start, offset); |
} |
-void TextNode::FillInBMInfo(RegExpCompiler* compiler, int initial_offset, |
- int budget, BoyerMooreLookahead* bm, |
- bool not_at_start) { |
+ |
+void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, |
+ BoyerMooreLookahead* bm, bool not_at_start) { |
if (initial_offset >= bm->length()) return; |
int offset = initial_offset; |
int max_char = bm->max_char(); |
@@ -6456,7 +6441,9 @@ void TextNode::FillInBMInfo(RegExpCompiler* compiler, int initial_offset, |
uc16 character = atom->data()[j]; |
if (bm->compiler()->ignore_case()) { |
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; |
- int length = GetCaseIndependentLetters(compiler, character, chars); |
+ int length = GetCaseIndependentLetters( |
+ isolate, character, bm->max_char() == String::kMaxOneByteCharCode, |
+ chars); |
for (int j = 0; j < length; j++) { |
bm->Set(offset, chars[j]); |
} |
@@ -6485,7 +6472,7 @@ void TextNode::FillInBMInfo(RegExpCompiler* compiler, int initial_offset, |
if (initial_offset == 0) set_bm_info(not_at_start, bm); |
return; |
} |
- on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, |
+ on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, |
true); // Not at start after a text node. |
if (initial_offset == 0) set_bm_info(not_at_start, bm); |
} |
@@ -6643,6 +6630,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { |
return IrregexpRegExpTooBig(isolate); |
} |
+ bool ignore_case = flags & JSRegExp::kIgnoreCase; |
bool is_sticky = flags & JSRegExp::kSticky; |
bool is_global = flags & JSRegExp::kGlobal; |
bool is_unicode = flags & JSRegExp::kUnicode; |
@@ -6692,11 +6680,11 @@ RegExpEngine::CompilationResult RegExpEngine::Compile( |
} |
} |
if (is_one_byte) { |
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); |
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
// Do it again to propagate the new nodes to places where they were not |
// put because they had not been calculated yet. |
if (node != NULL) { |
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler); |
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case); |
} |
} else if (compiler.unicode() && (is_global || is_sticky)) { |
node = OptionallyStepBackToLeadSurrogate(&compiler, node); |