Index: src/jsregexp.cc |
diff --git a/src/jsregexp.cc b/src/jsregexp.cc |
index 5a0a482091091e188afc75844657fd1d91ea4f9f..dd3a9ae7055c28bef43aa6fe961761ac7856f636 100644 |
--- a/src/jsregexp.cc |
+++ b/src/jsregexp.cc |
@@ -4987,7 +4987,9 @@ int AssertionNode::ComputeFirstCharacterSet(int budget) { |
case AFTER_WORD_CHARACTER: { |
ASSERT_NOT_NULL(on_success()); |
budget = on_success()->ComputeFirstCharacterSet(budget); |
- set_first_character_set(on_success()->first_character_set()); |
+ if (budget >= 0) { |
+ set_first_character_set(on_success()->first_character_set()); |
+ } |
break; |
} |
} |
@@ -5013,6 +5015,10 @@ int ActionNode::ComputeFirstCharacterSet(int budget) { |
int BackReferenceNode::ComputeFirstCharacterSet(int budget) { |
// We don't know anything about the first character of a backreference |
// at this point. |
+ // The potential first characters are the first characters of the capture, |
+ // and the first characters of the on_success node, depending on whether the |
+ // capture can be empty and whether it is known to be participating or known |
+ // not to be. |
return kComputeFirstCharacterSetFail; |
} |
@@ -5032,8 +5038,9 @@ int TextNode::ComputeFirstCharacterSet(int budget) { |
} else { |
ASSERT(text.type == TextElement::CHAR_CLASS); |
RegExpCharacterClass* char_class = text.data.u_char_class; |
+ ZoneList<CharacterRange>* ranges = char_class->ranges(); |
+ CharacterRange::Canonicalize(ranges); |
if (char_class->is_negated()) { |
- ZoneList<CharacterRange>* ranges = char_class->ranges(); |
int length = ranges->length(); |
int new_length = length + 1; |
if (length > 0) { |
@@ -5047,7 +5054,9 @@ int TextNode::ComputeFirstCharacterSet(int budget) { |
CharacterRange::Negate(ranges, negated_ranges); |
set_first_character_set(negated_ranges); |
} else { |
- set_first_character_set(char_class->ranges()); |
+ // TODO(lrn): Canonicalize ranges when they are created |
+ // instead of waiting until now. |
+ set_first_character_set(ranges); |
} |
} |
} |