Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Unified Diff: src/regexp/jsregexp.cc

Issue 1641613002: [regexp] implement /ui to mirror the implementation for /i. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: addressed comment Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/jsregexp.cc
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 7b510b072b0bba0d6e66726397213c52ead70a9f..654afa9a89e06495f4a070c439004c074f6f12e0 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -1598,19 +1598,34 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
// Returns the number of characters in the equivalence class, omitting those
// that cannot occur in the source string because it is Latin1.
-static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
- bool one_byte_subject,
- unibrow::uchar* letters) {
- int length =
- isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
- // Unibrow returns 0 or 1 for characters where case independence is
- // trivial.
- if (length == 0) {
- letters[0] = character;
- length = 1;
- }
-
- if (one_byte_subject) {
+static int GetCaseIndependentLetters(RegExpCompiler* compiler, uc16 character,
+ uc32* letters) {
+ int length;
+#ifdef V8_I18N_SUPPORT
+ if (compiler->unicode()) {
+ USet* set = uset_open(character, character);
+ uset_closeOver(set, USET_CASE_INSENSITIVE);
+ uset_removeAllStrings(set);
+ length = uset_size(set);
+ for (int i = 0; i < length; i++) {
+ letters[i] = uset_charAt(set, i);
+ }
+ uset_close(set);
+ } else // NOLINT
+// Fallback in case ICU is not included.
+#endif // V8_I18N_SUPPORT
+ {
+ length = compiler->isolate()->jsregexp_uncanonicalize()->get(character,
+ '\0', letters);
+ // Unibrow returns 0 or 1 for characters where case independence is
+ // trivial.
+ if (length == 0) {
+ letters[0] = character;
+ length = 1;
+ }
+ }
+
+ if (compiler->one_byte()) {
int new_length = 0;
for (int i = 0; i < length; i++) {
if (letters[i] <= String::kMaxOneByteCharCode) {
@@ -1623,14 +1638,9 @@ static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
return length;
}
-
-static inline bool EmitSimpleCharacter(Isolate* isolate,
- RegExpCompiler* compiler,
- uc16 c,
- Label* on_failure,
- int cp_offset,
- bool check,
- bool preloaded) {
+static inline bool EmitSimpleCharacter(RegExpCompiler* compiler, uc16 c,
+ Label* on_failure, int cp_offset,
+ bool check, bool preloaded) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
bool bound_checked = false;
if (!preloaded) {
@@ -1647,17 +1657,12 @@ static inline bool EmitSimpleCharacter(Isolate* isolate,
// Only emits non-letters (things that don't have case). Only used for case
// independent matches.
-static inline bool EmitAtomNonLetter(Isolate* isolate,
- RegExpCompiler* compiler,
- uc16 c,
- Label* on_failure,
- int cp_offset,
- bool check,
- bool preloaded) {
+static inline bool EmitAtomNonLetter(RegExpCompiler* compiler, uc16 c,
+ Label* on_failure, int cp_offset,
+ bool check, bool preloaded) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool one_byte = compiler->one_byte();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
+ int length = GetCaseIndependentLetters(compiler, c, chars);
if (length < 1) {
// This can't match. Must be an one-byte subject and a non-one-byte
// character. We do not need to do anything since the one-byte pass
@@ -1667,8 +1672,8 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,
bool checked = false;
// We handle the length > 1 case in a later pass.
if (length == 1) {
- if (one_byte && c > String::kMaxOneByteCharCodeU) {
- // Can't match - see above.
+ if (compiler->one_byte() && c > String::kMaxOneByteCharCodeU) {
+ // This cannot match.
return false; // Bounds not checked.
}
if (!preloaded) {
@@ -1717,28 +1722,18 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
return false;
}
-
-typedef bool EmitCharacterFunction(Isolate* isolate,
- RegExpCompiler* compiler,
- uc16 c,
- Label* on_failure,
- int cp_offset,
- bool check,
+typedef bool EmitCharacterFunction(RegExpCompiler* compiler, uc16 c,
+ Label* on_failure, int cp_offset, bool check,
bool preloaded);
// Only emits letters (things that have case). Only used for case independent
// matches.
-static inline bool EmitAtomLetter(Isolate* isolate,
- RegExpCompiler* compiler,
- uc16 c,
- Label* on_failure,
- int cp_offset,
- bool check,
+static inline bool EmitAtomLetter(RegExpCompiler* compiler, uc16 c,
+ Label* on_failure, int cp_offset, bool check,
bool preloaded) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool one_byte = compiler->one_byte();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
+ int length = GetCaseIndependentLetters(compiler, c, chars);
if (length <= 1) return false;
// We may not need to check against the end of the input string
// if this character lies before a character that matched.
@@ -1749,8 +1744,8 @@ static inline bool EmitAtomLetter(Isolate* isolate,
DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
switch (length) {
case 2: {
- if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
- chars[1], on_failure)) {
+ if (ShortCutEmitCharacterPair(macro_assembler, compiler->one_byte(),
+ chars[0], chars[1], on_failure)) {
} else {
macro_assembler->CheckCharacter(chars[0], &ok);
macro_assembler->CheckNotCharacter(chars[1], on_failure);
@@ -2287,13 +2282,12 @@ int ActionNode::EatsAtLeast(int still_to_find,
not_at_start);
}
-
-void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
+void ActionNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
if (action_type_ == BEGIN_SUBMATCH) {
bm->SetRest(offset);
} else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
- on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
+ on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start);
}
SaveBMInfo(bm, not_at_start, offset);
}
@@ -2314,12 +2308,12 @@ int AssertionNode::EatsAtLeast(int still_to_find,
not_at_start);
}
-
-void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
+void AssertionNode::FillInBMInfo(RegExpCompiler* compiler, int offset,
+ int budget, BoyerMooreLookahead* bm,
+ bool not_at_start) {
// Match the behaviour of EatsAtLeast on this node.
if (assertion_type() == AT_START && not_at_start) return;
- on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
+ on_success()->FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start);
SaveBMInfo(bm, not_at_start, offset);
}
@@ -2533,7 +2527,6 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
// Do not collect any quick check details if the text node reads backward,
// since it reads in the opposite direction than we use for quick checks.
if (read_backward()) return;
- Isolate* isolate = compiler->macro_assembler()->isolate();
DCHECK(characters_filled_in < details->characters());
int characters = details->characters();
int char_mask;
@@ -2552,8 +2545,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
uc16 c = quarks[i];
if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c,
- compiler->one_byte(), chars);
+ int length = GetCaseIndependentLetters(compiler, c, chars);
if (length == 0) {
// This can happen because all case variants are non-Latin1, but we
// know the input is Latin1.
@@ -2758,18 +2750,17 @@ class VisitMarker {
NodeInfo* info_;
};
-
-RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* SeqRegExpNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
VisitMarker marker(info());
- return FilterSuccessor(depth - 1, ignore_case);
+ return FilterSuccessor(depth - 1, compiler);
}
-
-RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
- RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
+RegExpNode* SeqRegExpNode::FilterSuccessor(int depth,
+ RegExpCompiler* compiler) {
+ RegExpNode* next = on_success_->FilterOneByte(depth - 1, compiler);
if (next == NULL) return set_replacement(NULL);
on_success_ = next;
return set_replacement(this);
@@ -2792,8 +2783,30 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
return false;
}
+static uc16 ConvertNonLatin1ToEquivalentLatin1(bool unicode, uc16 c) {
+#ifdef V8_I18N_SUPPORT
+ if (unicode) {
+ USet* set = uset_open(c, c);
+ uset_closeOver(set, USET_CASE_INSENSITIVE);
+ uset_removeAllStrings(set);
+ int length = uset_size(set);
+ uc16 result = 0;
+ for (int i = 0; i < length; i++) {
+ uc32 c = uset_charAt(set, i);
+ if (c <= String::kMaxOneByteCharCode) {
+ result = static_cast<uc16>(c);
+ break;
+ }
+ }
+ uset_close(set);
+ return result;
+ }
+// Fallback to unibrow if ICU is not included.
+#endif // V8_I18N_SUPPORT
+ return unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
+}
-RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* TextNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
@@ -2804,16 +2817,17 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
if (elm.text_type() == TextElement::ATOM) {
Vector<const uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
- uint16_t c = quarks[j];
+ uc16 c = quarks[j];
if (c <= String::kMaxOneByteCharCode) continue;
- if (!ignore_case) return set_replacement(NULL);
+ if (!compiler->ignore_case()) return set_replacement(NULL);
// Here, we need to check for characters whose upper and lower cases
// are outside the Latin-1 range.
- uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
+ uc16 converted =
+ ConvertNonLatin1ToEquivalentLatin1(compiler->unicode(), c);
// Character is outside Latin-1 completely
if (converted == 0) return set_replacement(NULL);
// Convert quark to Latin-1 in place.
- uint16_t* copy = const_cast<uint16_t*>(quarks.start());
+ uc16* copy = const_cast<uc16*>(quarks.start());
copy[j] = converted;
}
} else {
@@ -2828,24 +2842,25 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
ranges->at(0).from() == 0 &&
ranges->at(0).to() >= String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
+ if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges))
+ continue;
return set_replacement(NULL);
}
} else {
if (range_count == 0 ||
ranges->at(0).from() > String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
- if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
+ if (compiler->ignore_case() && RangesContainLatin1Equivalents(ranges))
+ continue;
return set_replacement(NULL);
}
}
}
}
- return FilterSuccessor(depth - 1, ignore_case);
+ return FilterSuccessor(depth - 1, compiler);
}
-
-RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* LoopChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2853,17 +2868,16 @@ RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
VisitMarker marker(info());
RegExpNode* continue_replacement =
- continue_node_->FilterOneByte(depth - 1, ignore_case);
+ continue_node_->FilterOneByte(depth - 1, compiler);
// If we can't continue after the loop then there is no sense in doing the
// loop.
if (continue_replacement == NULL) return set_replacement(NULL);
}
- return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
+ return ChoiceNode::FilterOneByte(depth - 1, compiler);
}
-
-RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
+RegExpNode* ChoiceNode::FilterOneByte(int depth, RegExpCompiler* compiler) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2883,7 +2897,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
for (int i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
RegExpNode* replacement =
- alternative.node()->FilterOneByte(depth - 1, ignore_case);
+ alternative.node()->FilterOneByte(depth - 1, compiler);
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
if (replacement != NULL) {
alternatives_->at(i).set_node(replacement);
@@ -2903,7 +2917,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
for (int i = 0; i < choice_count; i++) {
RegExpNode* replacement =
- alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
+ alternatives_->at(i).node()->FilterOneByte(depth - 1, compiler);
if (replacement != NULL) {
alternatives_->at(i).set_node(replacement);
new_alternatives->Add(alternatives_->at(i), zone());
@@ -2913,9 +2927,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
return this;
}
-
-RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
- bool ignore_case) {
+RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(
+ int depth, RegExpCompiler* compiler) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@@ -2923,12 +2936,12 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = alternatives_->at(1).node();
- RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
+ RegExpNode* replacement = node->FilterOneByte(depth - 1, compiler);
if (replacement == NULL) return set_replacement(NULL);
alternatives_->at(1).set_node(replacement);
RegExpNode* neg_node = alternatives_->at(0).node();
- RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, compiler);
// If the negative lookahead is always going to fail then
// we don't need to check it.
if (neg_replacement == NULL) return set_replacement(replacement);
@@ -2949,15 +2962,15 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
not_at_start);
}
-
-void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
+void LoopChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset,
+ int budget, BoyerMooreLookahead* bm,
+ bool not_at_start) {
if (body_can_be_zero_length_ || budget <= 0) {
bm->SetRest(offset);
SaveBMInfo(bm, not_at_start, offset);
return;
}
- ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
+ ChoiceNode::FillInBMInfo(compiler, offset, budget - 1, bm, not_at_start);
SaveBMInfo(bm, not_at_start, offset);
}
@@ -3049,7 +3062,6 @@ static void EmitHat(RegExpCompiler* compiler,
// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
- Isolate* isolate = assembler->isolate();
Trace::TriBool next_is_word_character = Trace::UNKNOWN;
bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
BoyerMooreLookahead* lookahead = bm_info(not_at_start);
@@ -3061,7 +3073,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
if (eats_at_least >= 1) {
BoyerMooreLookahead* bm =
new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
- FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
+ FillInBMInfo(compiler, 0, kRecursionBudget, bm, not_at_start);
if (bm->at(0)->is_non_word())
next_is_word_character = Trace::FALSE_VALUE;
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
@@ -3233,7 +3245,6 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
bool first_element_checked,
int* checked_up_to) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
- Isolate* isolate = assembler->isolate();
bool one_byte = compiler->one_byte();
Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
@@ -3251,6 +3262,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
switch (pass) {
case NON_LATIN1_MATCH:
DCHECK(one_byte);
+ DCHECK(!(compiler->unicode() && compiler->ignore_case()));
if (quarks[j] > String::kMaxOneByteCharCode) {
assembler->GoTo(backtrack);
return;
@@ -3271,8 +3283,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
if (emit_function != NULL) {
bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
bool bound_checked =
- emit_function(isolate, compiler, quarks[j], backtrack,
- cp_offset + j, bounds_check, preloaded);
+ emit_function(compiler, quarks[j], backtrack, cp_offset + j,
+ bounds_check, preloaded);
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
}
}
@@ -3355,7 +3367,13 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
return;
}
- if (compiler->one_byte()) {
+ if (compiler->one_byte() &&
+ !(compiler->unicode() && compiler->ignore_case())) {
+ // If any character within the text node is outside the Latin1 range, it
+ // cannot possibly match anything in a one-byte string. This still holds
+ // for case-insensitive non-unicode regexp patterns. However, for
+ // case-insensitive unicode regexp patterns, this is no longer true, e.g.
+ // /\u212b/ui matches "\u00c5".
int dummy = 0;
TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
}
@@ -4107,7 +4125,6 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
DCHECK(trace->is_trivial());
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- Isolate* isolate = macro_assembler->isolate();
// At this point we know that we are at a non-greedy loop that will eat
// any character one at a time. Any non-anchored regexp has such a
// loop prepended to it in order to find where it starts. We look for
@@ -4126,7 +4143,7 @@ int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
compiler,
zone());
GuardedAlternative alt0 = alternatives_->at(0);
- alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
+ alt0.node()->FillInBMInfo(compiler, 0, kRecursionBudget, bm, false);
}
}
if (bm != NULL) {
@@ -6388,9 +6405,8 @@ void Analysis::VisitAssertion(AssertionNode* that) {
EnsureAnalyzed(that->on_success());
}
-
-void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm,
+void BackReferenceNode::FillInBMInfo(RegExpCompiler* compiler, int offset,
+ int budget, BoyerMooreLookahead* bm,
bool not_at_start) {
// Working out the set of characters that a backreference can match is too
// hard, so we just say that any character can match.
@@ -6402,8 +6418,7 @@ void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
RegExpMacroAssembler::kTableSize);
-
-void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
+void ChoiceNode::FillInBMInfo(RegExpCompiler* compiler, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
ZoneList<GuardedAlternative>* alts = alternatives();
budget = (budget - 1) / alts->length();
@@ -6414,14 +6429,14 @@ void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
SaveBMInfo(bm, not_at_start, offset);
return;
}
- alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
+ alt.node()->FillInBMInfo(compiler, offset, budget, bm, not_at_start);
}
SaveBMInfo(bm, not_at_start, offset);
}
-
-void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
+void TextNode::FillInBMInfo(RegExpCompiler* compiler, int initial_offset,
+ int budget, BoyerMooreLookahead* bm,
+ bool not_at_start) {
if (initial_offset >= bm->length()) return;
int offset = initial_offset;
int max_char = bm->max_char();
@@ -6441,9 +6456,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
uc16 character = atom->data()[j];
if (bm->compiler()->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(
- isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
- chars);
+ int length = GetCaseIndependentLetters(compiler, character, chars);
for (int j = 0; j < length; j++) {
bm->Set(offset, chars[j]);
}
@@ -6472,7 +6485,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
if (initial_offset == 0) set_bm_info(not_at_start, bm);
return;
}
- on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
+ on_success()->FillInBMInfo(compiler, offset, budget - 1, bm,
true); // Not at start after a text node.
if (initial_offset == 0) set_bm_info(not_at_start, bm);
}
@@ -6630,7 +6643,6 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
return IrregexpRegExpTooBig(isolate);
}
- bool ignore_case = flags & JSRegExp::kIgnoreCase;
bool is_sticky = flags & JSRegExp::kSticky;
bool is_global = flags & JSRegExp::kGlobal;
bool is_unicode = flags & JSRegExp::kUnicode;
@@ -6680,11 +6692,11 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
}
}
if (is_one_byte) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != NULL) {
- node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, &compiler);
}
} else if (compiler.unicode() && (is_global || is_sticky)) {
node = OptionallyStepBackToLeadSurrogate(&compiler, node);
« no previous file with comments | « src/regexp/jsregexp.h ('k') | src/regexp/regexp-parser.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698