src/jsregexp.cc - Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking.

Unified Diff: src/jsregexp.cc

Issue 507051: Attempt to make \b\w+ faster. Slight performance increase on, e.g., string unpacking. (Closed)

Patch Set: Addressed review comments. Created 10 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/jsregexp.cc

diff --git a/src/jsregexp.cc b/src/jsregexp.cc

index 04d194419fd852eed9673c72c322bcec3ed0f0da..83aa4a516633d11ddcf9a93cd770eeaa6ddd3c94 100644

--- a/src/jsregexp.cc

+++ b/src/jsregexp.cc

@@ -1431,14 +1431,6 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

int cp_offset,

bool check_offset,

bool preloaded) {

- if (cc->is_standard() &&

- macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

- cp_offset,

- check_offset,

- on_failure)) {

- return;

- }

ZoneList<CharacterRange>* ranges = cc->ranges();

int max_char;

if (ascii) {

@@ -1489,6 +1481,12 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,

macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);

}

+ if (cc->is_standard() &&

+ macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),

+ on_failure)) {

+ return;

+ }

for (int i = 0; i < last_valid_range; i++) {

CharacterRange& range = ranges->at(i);

Label next_range;

@@ -1626,8 +1624,8 @@ int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) {

}

-int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find,

- int recursion_depth) {

+int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,

+ int recursion_depth) {

if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;

// Alternative 0 is the negative lookahead, alternative 1 is what comes

// afterwards.

@@ -2049,6 +2047,12 @@ static void EmitWordCheck(RegExpMacroAssembler* assembler,

Label* word,

Label* non_word,

bool fall_through_on_word) {

+ if (assembler->CheckSpecialCharacterClass(

+ fall_through_on_word ? 'w' : 'W',

+ fall_through_on_word ? non_word : word)) {

+ // Optimized implementation available.

+ return;

+ }

assembler->CheckCharacterGT('z', non_word);

assembler->CheckCharacterLT('0', non_word);

assembler->CheckCharacterGT('a' - 1, word);

@@ -2085,17 +2089,60 @@ static void EmitHat(RegExpCompiler* compiler,

assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,

new_trace.backtrack(),

false);

- // Newline means \n, \r, 0x2028 or 0x2029.

- if (!compiler->ascii()) {

- assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

+ if (!assembler->CheckSpecialCharacterClass('n',

+ new_trace.backtrack())) {

+ // Newline means \n, \r, 0x2028 or 0x2029.

+ if (!compiler->ascii()) {

+ assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);

+ }

+ assembler->CheckCharacter('\n', &ok);

+ assembler->CheckNotCharacter('\r', new_trace.backtrack());

}

- assembler->CheckCharacter('\n', &ok);

- assembler->CheckNotCharacter('\r', new_trace.backtrack());

assembler->Bind(&ok);

on_success->Emit(compiler, &new_trace);

}

+// Emit the code to handle \b and \B (word-boundary or non-word-boundary)

+// when we know whether the next character must be a word character or not.

+static void EmitHalfBoundaryCheck(AssertionNode::AssertionNodeType type,

+ RegExpCompiler* compiler,

+ RegExpNode* on_success,

+ Trace* trace) {

+ RegExpMacroAssembler* assembler = compiler->macro_assembler();

+ Label done;

+ Trace new_trace(*trace);

+ bool expect_word_character = (type == AssertionNode::AFTER_WORD_CHARACTER);

+ Label* on_word = expect_word_character ? &done : new_trace.backtrack();

+ Label* on_non_word = expect_word_character ? new_trace.backtrack() : &done;

+ // Check whether previous character was a word character.

+ switch(trace->at_start()) {

+ case Trace::TRUE:

+ if (expect_word_character) {

+ assembler->GoTo(on_non_word);

+ }

+ break;

+ case Trace::UNKNOWN:

+ ASSERT_EQ(0, trace->cp_offset());

+ assembler->CheckAtStart(on_non_word);

+ // Fall through.

+ case Trace::FALSE:

+ int prev_char_offset = trace->cp_offset() - 1;

+ assembler->LoadCurrentCharacter(prev_char_offset, NULL, false, 1);

+ EmitWordCheck(assembler, on_word, on_non_word, expect_word_character);

+ // We may or may not have loaded the previous character.

+ new_trace.InvalidateCurrentCharacter();

+ }

+ assembler->Bind(&done);

+ on_success->Emit(compiler, &new_trace);

// Emit the code to handle \b and \B (word-boundary or non-word-boundary).

static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,

RegExpCompiler* compiler,

@@ -2205,10 +2252,15 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {

case AFTER_NEWLINE:

EmitHat(compiler, on_success(), trace);

return;

- case AT_NON_BOUNDARY:

case AT_BOUNDARY:

+ case AT_NON_BOUNDARY: {

EmitBoundaryCheck(type_, compiler, on_success(), trace);

return;

+ }

+ case AFTER_WORD_CHARACTER:

+ case AFTER_NONWORD_CHARACTER: {

+ EmitHalfBoundaryCheck(type_, compiler, on_success(), trace);

+ }

}

on_success()->Emit(compiler, trace);

}

@@ -2791,7 +2843,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {

// to generate probably can't use it.

if (i != first_normal_choice) {

alt_gen->expects_preload = false;

- new_trace.set_characters_preloaded(0);

+ new_trace.InvalidateCurrentCharacter();

}

if (i < choice_count - 1) {

new_trace.set_backtrack(&alt_gen->after);

@@ -3282,6 +3334,12 @@ void DotPrinter::VisitAssertion(AssertionNode* that) {

case AssertionNode::AFTER_NEWLINE:

stream()->Add("label=\"(?<=\\n)\", shape=septagon");

break;

+ case AssertionNode::AFTER_WORD_CHARACTER:

+ stream()->Add("label=\"(?<=\\w)\", shape=septagon");

+ break;

+ case AssertionNode::AFTER_NONWORD_CHARACTER:

+ stream()->Add("label=\"(?<=\\W)\", shape=septagon");

+ break;

}

stream()->Add("];\n");

PrintAttributes(that);

@@ -3484,6 +3542,20 @@ bool RegExpCharacterClass::is_standard() {

set_.set_standard_set_type('.');

return true;

}

+ if (CompareRanges(set_.ranges(),

+ kLineTerminatorRanges,

+ kLineTerminatorRangeCount)) {

+ set_.set_standard_set_type('n');

+ return true;

+ }

+ if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {

+ set_.set_standard_set_type('w');

+ return true;

+ }

+ if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {

+ set_.set_standard_set_type('W');

+ return true;

+ }

return false;

}

@@ -4010,6 +4082,101 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,

}

+bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {

+ ASSERT_NOT_NULL(ranges);

+ int n = ranges->length();

+ if (n <= 1) return true;

+ int max = ranges->at(0).to();

+ for (int i = 1; i < n; i++) {

+ CharacterRange next_range = ranges->at(i);

+ if (next_range.from() <= max + 1) return false;

+ max = next_range.to();

+ }

+ return true;

+SetRelation CharacterRange::WordCharacterRelation(

+ ZoneList<CharacterRange>* range) {

+ ASSERT(IsCanonical(range));

+ int i = 0; // Word character range index.

+ int j = 0; // Argument range index.

+ ASSERT_NE(0, kWordRangeCount);

+ SetRelation result;

+ if (range->length() == 0) {

+ result.SetElementsInSecondSet();

+ return result;

+ }

+ CharacterRange argument_range = range->at(0);

+ CharacterRange word_range = CharacterRange(kWordRanges[0], kWordRanges[1]);

+ while (i < kWordRangeCount && j < range->length()) {

+ // Check the two ranges for the five cases:

+ // - no overlap.

+ // - partial overlap (there are elements in both ranges that isn't

+ // in the other, and there are also elements that are in both).

+ // - argument range entirely inside word range.

+ // - word range entirely inside argument range.

+ // - ranges are completely equal.

+ // First check for no overlap. The earlier range is not in the other set.

+ if (argument_range.from() > word_range.to()) {

+ // Ranges are disjoint. The earlier word range contains elements that

+ // cannot be in the argument set.

+ result.SetElementsInSecondSet();

+ } else if (word_range.from() > argument_range.to()) {

+ // Ranges are disjoint. The earlier argument range contains elements that

+ // cannot be in the word set.

+ result.SetElementsInFirstSet();

+ } else if (word_range.from() <= argument_range.from() &&

+ word_range.to() >= argument_range.from()) {

+ result.SetElementsInBothSets();

+ // argument range completely inside word range.

+ if (word_range.from() < argument_range.from() ||

+ word_range.to() > argument_range.from()) {

+ result.SetElementsInSecondSet();

+ }

+ } else if (word_range.from() >= argument_range.from() &&

+ word_range.to() <= argument_range.from()) {

+ result.SetElementsInBothSets();

+ result.SetElementsInFirstSet();

+ } else {

+ // There is overlap, and neither is a subrange of the other

+ result.SetElementsInFirstSet();

+ result.SetElementsInSecondSet();

+ result.SetElementsInBothSets();

+ }

+ if (result.NonTrivialIntersection()) {

+ // The result is as (im)precise as we can possibly make it.

+ return result;

+ }

+ // Progress the range(s) with minimal to-character.

+ uc16 word_to = word_range.to();

+ uc16 argument_to = argument_range.to();

+ if (argument_to <= word_to) {

+ j++;

+ if (j < range->length()) {

+ argument_range = range->at(j);

+ }

+ if (word_to <= argument_to) {

+ i += 2;

+ if (i < kWordRangeCount) {

+ word_range = CharacterRange(kWordRanges[i], kWordRanges[i + 1]);

+ }

+ // Check if anything wasn't compared in the loop.

+ if (i < kWordRangeCount) {

+ // word range contains something not in argument range.

+ result.SetElementsInSecondSet();

+ } else if (j < range->length()){

+ // Argument range contains something not in word range.

+ result.SetElementsInFirstSet();

+ }

+ return result;

static void AddUncanonicals(ZoneList<CharacterRange>* ranges,

int bottom,

int top) {

@@ -4119,6 +4286,287 @@ ZoneList<CharacterRange>* CharacterSet::ranges() {

}

+// Move a number of elements in a zonelist to another position

+// in the same list. Handles overlapping source and target areas.

+static void MoveRanges(ZoneList<CharacterRange>* list,

+ int from,

+ int to,

+ int count) {

+ // Ranges are potentially overlapping.

+ if (from < to) {

+ for (int i = count - 1; i >= 0; i--) {

+ list->at(to + i) = list->at(from + i);

+ }

+ } else {

+ for (int i = 0; i < count; i++) {

+ list->at(to + i) = list->at(from + i);

+ }

+static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,

+ int count,

+ CharacterRange insert) {

+ // Inserts a range into list[0..count[, which must be sorted

+ // by from value and non-overlapping and non-adjacent, using at most

+ // list[0..count] for the result. Returns the number of resulting

+ // canonicalized ranges. Inserting a range may collapse existing ranges into

+ // fewer ranges, so the return value can be anything in the range 1..count+1.

+ uc16 from = insert.from();

+ uc16 to = insert.to();

+ int start_pos = 0;

+ int end_pos = count;

+ for (int i = count - 1; i >= 0; i--) {

+ CharacterRange current = list->at(i);

+ if (current.from() > to + 1) {

+ end_pos = i;

+ } else if (current.to() + 1 < from) {

+ start_pos = i + 1;

+ break;

+ }

+ // Inserted range overlaps, or is adjacent to, ranges at positions

+ // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are

+ // not affected by the insertion.

+ // If start_pos == end_pos, the range must be inserted before start_pos.

+ // if start_pos < end_pos, the entire range from start_pos to end_pos

+ // must be merged with the insert range.

+ if (start_pos == end_pos) {

+ // Insert between existing ranges at position start_pos.

+ if (start_pos < count) {

+ MoveRanges(list, start_pos, start_pos + 1, count - start_pos);

+ }

+ list->at(start_pos) = insert;

+ return count + 1;

+ }

+ if (start_pos + 1 == end_pos) {

+ // Replace single existing range at position start_pos.

+ CharacterRange to_replace = list->at(start_pos);

+ int new_from = Min(to_replace.from(), from);

+ int new_to = Max(to_replace.to(), to);

+ list->at(start_pos) = CharacterRange(new_from, new_to);

+ return count;

+ }

+ // Replace a number of existing ranges from start_pos to end_pos - 1.

+ // Move the remaining ranges down.

+ int new_from = Min(list->at(start_pos).from(), from);

+ int new_to = Max(list->at(end_pos - 1).to(), to);

+ if (end_pos < count) {

+ MoveRanges(list, end_pos, start_pos + 1, count - end_pos);

+ }

+ list->at(start_pos) = CharacterRange(new_from, new_to);

+ return count - (end_pos - start_pos) + 1;

+void CharacterSet::Canonicalize() {

+ // Special/default classes are always considered canonical. The result

+ // of calling ranges() will be sorted.

+ if (ranges_ == NULL) return;

+ CharacterRange::Canonicalize(ranges_);

+void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {

+ if (character_ranges->length() <= 1) return;

+ // Check whether ranges are already canonical (increasing, non-overlapping,

+ // non-adjacent).

+ int n = character_ranges->length();

+ int max = character_ranges->at(0).to();

+ int i = 1;

+ while (i < n) {

+ CharacterRange current = character_ranges->at(i);

+ if (current.from() <= max + 1) {

+ break;

+ }

+ max = current.to();

+ i++;

+ }

+ // Canonical until the i'th range. If that's all of them, we are done.

+ if (i == n) return;

+ // The ranges at index i and forward are not canonicalized. Make them so by

+ // doing the equivalent of insertion sort (inserting each into the previous

+ // list, in order).

+ // Notice that inserting a range can reduce the number of ranges in the

+ // result due to combining of adjacent and overlapping ranges.

+ int read = i; // Range to insert.

+ int num_canonical = i; // Length of canonicalized part of list.

+ do {

+ num_canonical = InsertRangeInCanonicalList(character_ranges,

+ num_canonical,

+ character_ranges->at(read));

+ read++;

+ } while (read < n);

+ character_ranges->Rewind(num_canonical);

+ ASSERT(CharacterRange::IsCanonical(character_ranges));

+// Utility function for CharacterRange::Merge. Adds a range at the end of

+// a canonicalized range list, if necessary merging the range with the last

+// range of the list.

+static void AddRangeToSet(ZoneList<CharacterRange>* set, CharacterRange range) {

+ if (set == NULL) return;

+ ASSERT(set->length() == 0 || set->at(set->length() - 1).to() < range.from());

+ int n = set->length();

+ if (n > 0) {

+ CharacterRange lastRange = set->at(n - 1);

+ if (lastRange.to() == range.from() - 1) {

+ set->at(n - 1) = CharacterRange(lastRange.from(), range.to());

+ return;

+ }

+ set->Add(range);

+static void AddRangeToSelectedSet(int selector,

+ ZoneList<CharacterRange>* first_set,

+ ZoneList<CharacterRange>* second_set,

+ ZoneList<CharacterRange>* intersection_set,

+ CharacterRange range) {

+ switch (selector) {

+ case kInsideFirst:

+ AddRangeToSet(first_set, range);

+ break;

+ case kInsideSecond:

+ AddRangeToSet(second_set, range);

+ break;

+ case kInsideBoth:

+ AddRangeToSet(intersection_set, range);

+ break;

+ }

+void CharacterRange::Merge(ZoneList<CharacterRange>* first_set,

+ ZoneList<CharacterRange>* second_set,

+ ZoneList<CharacterRange>* first_set_only_out,

+ ZoneList<CharacterRange>* second_set_only_out,

+ ZoneList<CharacterRange>* both_sets_out) {

+ // Inputs are canonicalized.

+ ASSERT(CharacterRange::IsCanonical(first_set));

+ ASSERT(CharacterRange::IsCanonical(second_set));

+ // Outputs are empty, if applicable.

+ ASSERT(first_set_only_out == NULL || first_set_only_out->length() == 0);

+ ASSERT(second_set_only_out == NULL || second_set_only_out->length() == 0);

+ ASSERT(both_sets_out == NULL || both_sets_out->length() == 0);

+ // Merge sets by iterating through the lists in order of lowest "from" value,

+ // and putting intervals into one of three sets.

+ if (first_set->length() == 0) {

+ second_set_only_out->AddAll(*second_set);

+ return;

+ }

+ if (second_set->length() == 0) {

+ first_set_only_out->AddAll(*first_set);

+ return;

+ }

+ // Indices into input lists.

+ int i1 = 0;

+ int i2 = 0;

+ // Cache length of input lists.

+ int n1 = first_set->length();

+ int n2 = second_set->length();

+ // Current range. May be invalid if state is kInsideNone.

+ int from = 0;

+ int to = -1;

+ // Where current range comes from.

+ int state = kInsideNone;

+ while (i1 < n1 || i2 < n2) {

+ CharacterRange next_range;

+ int range_source;

+ if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) {

+ next_range = first_set->at(i1++);

+ range_source = kInsideFirst;

+ } else {

+ next_range = second_set->at(i2++);

+ range_source = kInsideSecond;

+ }

+ if (to < next_range.from()) {

+ // Ranges disjoint: |current| |next|

+ AddRangeToSelectedSet(state,

+ first_set_only_out,

+ second_set_only_out,

+ both_sets_out,

+ CharacterRange(from, to));

+ from = next_range.from();

+ to = next_range.to();

+ state = range_source;

+ } else {

+ if (from < next_range.from()) {

+ AddRangeToSelectedSet(state,

+ first_set_only_out,

+ second_set_only_out,

+ both_sets_out,

+ CharacterRange(from, next_range.from()-1));

+ }

+ if (to < next_range.to()) {

+ // Ranges overlap: |current|

+ // |next|

+ AddRangeToSelectedSet(state | range_source,

+ first_set_only_out,

+ second_set_only_out,

+ both_sets_out,

+ CharacterRange(next_range.from(), to));

+ from = to + 1;

+ to = next_range.to();

+ state = range_source;

+ } else {

+ // Range included: |current| , possibly ending at same character.

+ // |next|

+ AddRangeToSelectedSet(

+ state | range_source,

+ first_set_only_out,

+ second_set_only_out,

+ both_sets_out,

+ CharacterRange(next_range.from(), next_range.to()));

+ from = next_range.to() + 1;

+ // If ranges end at same character, both ranges are consumed completely.

+ if (next_range.to() == to) state = kInsideNone;

+ }

+ AddRangeToSelectedSet(state,

+ first_set_only_out,

+ second_set_only_out,

+ both_sets_out,

+ CharacterRange(from, to));

+void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,

+ ZoneList<CharacterRange>* negated_ranges) {

+ ASSERT(CharacterRange::IsCanonical(ranges));

+ ASSERT_EQ(0, negated_ranges->length());

+ int range_count = ranges->length();

+ uc16 from = 0;

+ int i = 0;

+ if (range_count > 0 && ranges->at(0).from() == 0) {

+ from = ranges->at(0).to();

+ i = 1;

+ }

+ while (i < range_count) {

+ CharacterRange range = ranges->at(i);

+ negated_ranges->Add(CharacterRange(from + 1, range.from() - 1));

+ from = range.to();

+ i++;

+ }

+ if (from < String::kMaxUC16CharCode) {

+ negated_ranges->Add(CharacterRange(from + 1, String::kMaxUC16CharCode));

+ }

// -------------------------------------------------------------------

// Interest propagation

@@ -4410,9 +4858,203 @@ void Analysis::VisitBackReference(BackReferenceNode* that) {

void Analysis::VisitAssertion(AssertionNode* that) {

EnsureAnalyzed(that->on_success());

+ AssertionNode::AssertionNodeType type = that->type();

+ if (type == AssertionNode::AT_BOUNDARY ||

+ type == AssertionNode::AT_NON_BOUNDARY) {

+ // Check if the following character is known to be a word character

+ // or known to not be a word character.

+ ZoneList<CharacterRange>* following_chars = that->FirstCharacterSet();

+ CharacterRange::Canonicalize(following_chars);

+ SetRelation word_relation =

+ CharacterRange::WordCharacterRelation(following_chars);

+ if (word_relation.ContainedIn()) {

+ // Following character is definitely a word character.

+ type = (type == AssertionNode::AT_BOUNDARY) ?

+ AssertionNode::AFTER_NONWORD_CHARACTER :

+ AssertionNode::AFTER_WORD_CHARACTER;

+ that->set_type(type);

+ } else if (word_relation.Disjoint()) {

+ // Following character is definitely *not* a word character.

+ type = (type == AssertionNode::AT_BOUNDARY) ?

+ AssertionNode::AFTER_WORD_CHARACTER :

+ AssertionNode::AFTER_NONWORD_CHARACTER;

+ that->set_type(type);

+ }

}

+ZoneList<CharacterRange>* RegExpNode::FirstCharacterSet() {

+ if (first_character_set_ == NULL) {

+ if (ComputeFirstCharacterSet(kFirstCharBudget) < 0) {

+ // If we can't find an exact solution within the budget, we

+ // set the value to the set of every character, i.e., all characters

+ // are possible.

+ ZoneList<CharacterRange>* all_set = new ZoneList<CharacterRange>(1);

+ all_set->Add(CharacterRange::Everything());

+ first_character_set_ = all_set;

+ }

+ return first_character_set_;

+int RegExpNode::ComputeFirstCharacterSet(int budget) {

+ // Default behavior is to not be able to determine the first character.

+ return kComputeFirstCharacterSetFail;

+int LoopChoiceNode::ComputeFirstCharacterSet(int budget) {

+ budget--;

+ if (budget >= 0) {

+ // Find loop min-iteration. It's the value of the guarded choice node

+ // with a GEQ guard, if any.

+ int min_repetition = 0;

+ for (int i = 0; i <= 1; i++) {

+ GuardedAlternative alternative = alternatives()->at(i);

+ ZoneList<Guard*>* guards = alternative.guards();

+ if (guards != NULL && guards->length() > 0) {

+ Guard* guard = guards->at(0);

+ if (guard->op() == Guard::GEQ) {

+ min_repetition = guard->value();

+ break;

+ }

+ budget = loop_node()->ComputeFirstCharacterSet(budget);

+ if (budget >= 0) {

+ ZoneList<CharacterRange>* character_set =

+ loop_node()->first_character_set();

+ if (body_can_be_zero_length() || min_repetition == 0) {

+ budget = continue_node()->ComputeFirstCharacterSet(budget);

+ if (budget < 0) return budget;

+ ZoneList<CharacterRange>* body_set =

+ continue_node()->first_character_set();

+ ZoneList<CharacterRange>* union_set =

+ new ZoneList<CharacterRange>(Max(character_set->length(),

+ body_set->length()));

+ CharacterRange::Merge(character_set,

+ body_set,

+ union_set,

+ union_set);

+ character_set = union_set;

+ }

+ set_first_character_set(character_set);

+ }

+ return budget;

+int NegativeLookaheadChoiceNode::ComputeFirstCharacterSet(int budget) {

+ budget--;

+ if (budget >= 0) {

+ GuardedAlternative successor = this->alternatives()->at(1);

+ RegExpNode* successor_node = successor.node();

+ budget = successor_node->ComputeFirstCharacterSet(budget);

+ if (budget >= 0) {

+ set_first_character_set(successor_node->first_character_set());

+ }

+ return budget;

+// The first character set of an EndNode is unknowable. Just use the

+// default implementation that fails and returns all characters as possible.

+int AssertionNode::ComputeFirstCharacterSet(int budget) {

+ budget -= 1;

+ if (budget >= 0) {

+ switch (type_) {

+ case AT_END: {

+ set_first_character_set(new ZoneList<CharacterRange>(0));

+ break;

+ }

+ case AT_START:

+ case AT_BOUNDARY:

+ case AT_NON_BOUNDARY:

+ case AFTER_NEWLINE:

+ case AFTER_NONWORD_CHARACTER:

+ case AFTER_WORD_CHARACTER: {

+ ASSERT_NOT_NULL(on_success());

+ budget = on_success()->ComputeFirstCharacterSet(budget);

+ set_first_character_set(on_success()->first_character_set());

+ break;

+ }

+ return budget;

+int ActionNode::ComputeFirstCharacterSet(int budget) {

+ if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail;

+ budget--;

+ if (budget >= 0) {

+ ASSERT_NOT_NULL(on_success());

+ budget = on_success()->ComputeFirstCharacterSet(budget);

+ if (budget >= 0) {

+ set_first_character_set(on_success()->first_character_set());

+ }

+ return budget;

+int BackReferenceNode::ComputeFirstCharacterSet(int budget) {

+ // We don't know anything about the first character of a backreference

+ // at this point.

+ return kComputeFirstCharacterSetFail;

+int TextNode::ComputeFirstCharacterSet(int budget) {

+ budget--;

+ if (budget >= 0) {

+ ASSERT_NE(0, elements()->length());

+ TextElement text = elements()->at(0);

+ if (text.type == TextElement::ATOM) {

+ RegExpAtom* atom = text.data.u_atom;

+ ASSERT_NE(0, atom->length());

+ uc16 first_char = atom->data()[0];

+ ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1);

+ range->Add(CharacterRange(first_char, first_char));

+ set_first_character_set(range);

+ } else {

+ ASSERT(text.type == TextElement::CHAR_CLASS);

+ RegExpCharacterClass* char_class = text.data.u_char_class;

+ if (char_class->is_negated()) {

+ ZoneList<CharacterRange>* ranges = char_class->ranges();

+ int length = ranges->length();

+ int new_length = length + 1;

+ if (length > 0) {

+ if (ranges->at(0).from() == 0) new_length--;

+ if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) {

+ new_length--;

+ }

+ ZoneList<CharacterRange>* negated_ranges =

+ new ZoneList<CharacterRange>(new_length);

+ CharacterRange::Negate(ranges, negated_ranges);

+ set_first_character_set(negated_ranges);

+ } else {

+ set_first_character_set(char_class->ranges());

+ }

+ return budget;

// -------------------------------------------------------------------

// Dispatch table construction

@@ -4471,7 +5113,6 @@ void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {

}

static int CompareRangeByFrom(const CharacterRange* a,

const CharacterRange* b) {

return Compare<uc16>(a->from(), b->from());

« no previous file with comments | « src/jsregexp.h ('k') | src/parser.cc » ('j') | no next file with comments »