Index: src/parser.cc |
diff --git a/src/parser.cc b/src/parser.cc |
index 056332b5b44ec8de7049aa392daa1ddda57e0ebb..5473f25164aef9019015e8f71c9b2b3c01fb423b 100644 |
--- a/src/parser.cc |
+++ b/src/parser.cc |
@@ -4409,10 +4409,25 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) { |
} |
+static const uc16 kNoCharClass = 0; |
+ |
+// Adds range or pre-defined character class to character ranges. |
+// If char_class is not kInvalidClass, it's interpreted as a class |
+// escape (i.e., 's' means whitespace, from '\s'). |
+static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
+ uc16 char_class, |
+ CharacterRange range) { |
+ if (char_class != kNoCharClass) { |
+ CharacterRange::AddClassEscape(char_class, ranges); |
+ } else { |
+ ranges->Add(range); |
+ } |
+} |
+ |
+ |
RegExpTree* RegExpParser::ParseCharacterClass() { |
static const char* kUnterminated = "Unterminated character class"; |
static const char* kRangeOutOfOrder = "Range out of order in character class"; |
- static const char* kInvalidRange = "Invalid character range"; |
ASSERT_EQ(current(), '['); |
Advance(); |
@@ -4421,30 +4436,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() { |
is_negated = true; |
Advance(); |
} |
- // A CharacterClass is a sequence of single characters, character class |
- // escapes or ranges. Ranges are on the form "x-y" where x and y are |
- // single characters (and not character class escapes like \s). |
- // A "-" may occur at the start or end of the character class (just after |
- // "[" or "[^", or just before "]") without being considered part of a |
- // range. A "-" may also appear as the beginning or end of a range. |
- // I.e., [--+] is valid, so is [!--]. |
- |
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
while (has_more() && current() != ']') { |
- uc16 char_class = 0; |
+ uc16 char_class = kNoCharClass; |
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); |
- if (char_class) { |
- CharacterRange::AddClassEscape(char_class, ranges); |
- if (current() == '-') { |
- Advance(); |
- ranges->Add(CharacterRange::Singleton('-')); |
- if (current() != ']') { |
- ReportError(CStrVector(kInvalidRange) CHECK_FAILED); |
- } |
- break; |
- } |
- continue; |
- } |
if (current() == '-') { |
Advance(); |
if (current() == kEndMarker) { |
@@ -4452,20 +4447,25 @@ RegExpTree* RegExpParser::ParseCharacterClass() { |
// following code report an error. |
break; |
} else if (current() == ']') { |
- ranges->Add(first); |
+ AddRangeOrEscape(ranges, char_class, first); |
ranges->Add(CharacterRange::Singleton('-')); |
break; |
} |
- CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED); |
- if (char_class) { |
- ReportError(CStrVector(kInvalidRange) CHECK_FAILED); |
+ uc16 char_class_2 = kNoCharClass; |
+ CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); |
+ if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { |
+ // Either end is an escaped character class. Treat the '-' verbatim. |
+ AddRangeOrEscape(ranges, char_class, first); |
+ ranges->Add(CharacterRange::Singleton('-')); |
+ AddRangeOrEscape(ranges, char_class_2, next); |
+ continue; |
} |
if (first.from() > next.to()) { |
return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); |
} |
ranges->Add(CharacterRange::Range(first.from(), next.to())); |
} else { |
- ranges->Add(first); |
+ AddRangeOrEscape(ranges, char_class, first); |
} |
} |
if (!has_more()) { |