| Index: src/regexp/regexp-parser.cc
|
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
|
| index 91c14cce497bd285e33bcd4896589b08bbe06437..77a741f1e8544a43abc48204bf058d66ba4fcc11 100644
|
| --- a/src/regexp/regexp-parser.cc
|
| +++ b/src/regexp/regexp-parser.cc
|
| @@ -102,28 +102,11 @@
|
|
|
| bool RegExpParser::simple() { return simple_; }
|
|
|
| -bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) {
|
| - switch (c) {
|
| - case '^':
|
| - case '$':
|
| - case '\\':
|
| - case '.':
|
| - case '*':
|
| - case '+':
|
| - case '?':
|
| - case '(':
|
| - case ')':
|
| - case '[':
|
| - case ']':
|
| - case '{':
|
| - case '}':
|
| - case '|':
|
| - case '/':
|
| - return true;
|
| - default:
|
| - break;
|
| - }
|
| - return false;
|
| +
|
| +bool RegExpParser::IsSyntaxCharacter(uc32 c) {
|
| + return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' ||
|
| + c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' ||
|
| + c == '{' || c == '}' || c == '|';
|
| }
|
|
|
|
|
| @@ -178,14 +161,14 @@
|
| case kEndMarker:
|
| if (state->IsSubexpression()) {
|
| // Inside a parenthesized group when hitting end of input.
|
| - return ReportError(CStrVector("Unterminated group"));
|
| + ReportError(CStrVector("Unterminated group") CHECK_FAILED);
|
| }
|
| DCHECK_EQ(INITIAL, state->group_type());
|
| // Parsing completed successfully.
|
| return builder->ToRegExp();
|
| case ')': {
|
| if (!state->IsSubexpression()) {
|
| - return ReportError(CStrVector("Unmatched ')'"));
|
| + ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
|
| }
|
| DCHECK_NE(INITIAL, state->group_type());
|
|
|
| @@ -293,12 +276,13 @@
|
| }
|
| // Fall through.
|
| default:
|
| - return ReportError(CStrVector("Invalid group"));
|
| + ReportError(CStrVector("Invalid group") CHECK_FAILED);
|
| + break;
|
| }
|
| Advance(2);
|
| } else {
|
| if (captures_started_ >= kMaxCaptures) {
|
| - return ReportError(CStrVector("Too many captures"));
|
| + ReportError(CStrVector("Too many captures") CHECK_FAILED);
|
| }
|
| captures_started_++;
|
| }
|
| @@ -376,25 +360,24 @@
|
| }
|
| break;
|
| }
|
| - // With /u, no identity escapes except for syntax characters
|
| - // are allowed. Otherwise, all identity escapes are allowed.
|
| - if (unicode()) {
|
| - return ReportError(CStrVector("Invalid escape"));
|
| - }
|
| uc32 first_digit = Next();
|
| if (first_digit == '8' || first_digit == '9') {
|
| - builder->AddCharacter(first_digit);
|
| - Advance(2);
|
| + // If the 'u' flag is present, only syntax characters can be
|
| + // escaped,
|
| + // no other identity escapes are allowed. If the 'u' flag is not
|
| + // present, all identity escapes are allowed.
|
| + if (!unicode()) {
|
| + builder->AddCharacter(first_digit);
|
| + Advance(2);
|
| + } else {
|
| + return ReportError(CStrVector("Invalid escape"));
|
| + }
|
| break;
|
| }
|
| }
|
| // FALLTHROUGH
|
| case '0': {
|
| Advance();
|
| - if (unicode() && Next() >= '0' && Next() <= '9') {
|
| - // With /u, decimal escape with leading 0 are not parsed as octal.
|
| - return ReportError(CStrVector("Invalid decimal escape"));
|
| - }
|
| uc32 octal = ParseOctalLiteral();
|
| builder->AddCharacter(octal);
|
| break;
|
| @@ -432,10 +415,6 @@
|
| // This is outside the specification. We match JSC in
|
| // reading the backslash as a literal character instead
|
| // of as starting an escape.
|
| - if (unicode()) {
|
| - // With /u, invalid escapes are not treated as identity escapes.
|
| - return ReportError(CStrVector("Invalid unicode escape"));
|
| - }
|
| builder->AddCharacter('\\');
|
| } else {
|
| Advance(2);
|
| @@ -451,7 +430,8 @@
|
| } else if (!unicode()) {
|
| builder->AddCharacter('x');
|
| } else {
|
| - // With /u, invalid escapes are not treated as identity escapes.
|
| + // If the 'u' flag is present, invalid escapes are not treated as
|
| + // identity escapes.
|
| return ReportError(CStrVector("Invalid escape"));
|
| }
|
| break;
|
| @@ -464,16 +444,20 @@
|
| } else if (!unicode()) {
|
| builder->AddCharacter('u');
|
| } else {
|
| - // With /u, invalid escapes are not treated as identity escapes.
|
| + // If the 'u' flag is present, invalid escapes are not treated as
|
| + // identity escapes.
|
| return ReportError(CStrVector("Invalid unicode escape"));
|
| }
|
| break;
|
| }
|
| default:
|
| Advance();
|
| - // With /u, no identity escapes except for syntax characters
|
| - // are allowed. Otherwise, all identity escapes are allowed.
|
| - if (!unicode() || IsSyntaxCharacterOrSlash(current())) {
|
| + // If the 'u' flag is present, only syntax characters can be
|
| + // escaped, no
|
| + // other identity escapes are allowed. If the 'u' flag is not
|
| + // present,
|
| + // all identity escapes are allowed.
|
| + if (!unicode() || IsSyntaxCharacter(current())) {
|
| builder->AddCharacter(current());
|
| Advance();
|
| } else {
|
| @@ -485,16 +469,10 @@
|
| case '{': {
|
| int dummy;
|
| if (ParseIntervalQuantifier(&dummy, &dummy)) {
|
| - return ReportError(CStrVector("Nothing to repeat"));
|
| + ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
|
| }
|
| // fallthrough
|
| }
|
| - case '}':
|
| - case ']':
|
| - if (unicode()) {
|
| - return ReportError(CStrVector("Lone quantifier brackets"));
|
| - }
|
| - // fallthrough
|
| default:
|
| builder->AddUnicodeCharacter(current());
|
| Advance();
|
| @@ -527,15 +505,13 @@
|
| case '{':
|
| if (ParseIntervalQuantifier(&min, &max)) {
|
| if (max < min) {
|
| - return ReportError(
|
| - CStrVector("numbers out of order in {} quantifier"));
|
| + ReportError(CStrVector("numbers out of order in {} quantifier.")
|
| + CHECK_FAILED);
|
| }
|
| break;
|
| - } else if (unicode()) {
|
| - // With /u, incomplete quantifiers are not allowed.
|
| - return ReportError(CStrVector("Incomplete quantifier"));
|
| + } else {
|
| + continue;
|
| }
|
| - continue;
|
| default:
|
| continue;
|
| }
|
| @@ -548,9 +524,7 @@
|
| quantifier_type = RegExpQuantifier::POSSESSIVE;
|
| Advance();
|
| }
|
| - if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {
|
| - return ReportError(CStrVector("Invalid quantifier"));
|
| - }
|
| + builder->AddQuantifierToAtom(min, max, quantifier_type);
|
| }
|
| }
|
|
|
| @@ -848,22 +822,13 @@
|
| case 'c': {
|
| uc32 controlLetter = Next();
|
| uc32 letter = controlLetter & ~('A' ^ 'a');
|
| - // For compatibility with JSC, inside a character class. We also accept
|
| - // digits and underscore as control characters, unless with /u.
|
| - if (letter >= 'A' && letter <= 'Z') {
|
| + // For compatibility with JSC, inside a character class
|
| + // we also accept digits and underscore as control characters.
|
| + if ((controlLetter >= '0' && controlLetter <= '9') ||
|
| + controlLetter == '_' || (letter >= 'A' && letter <= 'Z')) {
|
| Advance(2);
|
| // Control letters mapped to ASCII control characters in the range
|
| // 0x00-0x1f.
|
| - return controlLetter & 0x1f;
|
| - }
|
| - if (unicode()) {
|
| - // With /u, invalid escapes are not treated as identity escapes.
|
| - ReportError(CStrVector("Invalid class escape"));
|
| - return 0;
|
| - }
|
| - if ((controlLetter >= '0' && controlLetter <= '9') ||
|
| - controlLetter == '_') {
|
| - Advance(2);
|
| return controlLetter & 0x1f;
|
| }
|
| // We match JSC in reading the backslash as a literal
|
| @@ -881,43 +846,43 @@
|
| // For compatibility, we interpret a decimal escape that isn't
|
| // a back reference (and therefore either \0 or not valid according
|
| // to the specification) as a 1..3 digit octal character code.
|
| - if (unicode()) {
|
| - // With /u, decimal escape is not interpreted as octal character code.
|
| - ReportError(CStrVector("Invalid class escape"));
|
| - return 0;
|
| - }
|
| return ParseOctalLiteral();
|
| case 'x': {
|
| Advance();
|
| uc32 value;
|
| - if (ParseHexEscape(2, &value)) return value;
|
| - if (unicode()) {
|
| - // With /u, invalid escapes are not treated as identity escapes.
|
| - ReportError(CStrVector("Invalid escape"));
|
| - return 0;
|
| - }
|
| - // If \x is not followed by a two-digit hexadecimal, treat it
|
| - // as an identity escape.
|
| - return 'x';
|
| + if (ParseHexEscape(2, &value)) {
|
| + return value;
|
| + }
|
| + if (!unicode()) {
|
| + // If \x is not followed by a two-digit hexadecimal, treat it
|
| + // as an identity escape.
|
| + return 'x';
|
| + }
|
| + // If the 'u' flag is present, invalid escapes are not treated as
|
| + // identity escapes.
|
| + ReportError(CStrVector("Invalid escape"));
|
| + return 0;
|
| }
|
| case 'u': {
|
| Advance();
|
| uc32 value;
|
| - if (ParseUnicodeEscape(&value)) return value;
|
| - if (unicode()) {
|
| - // With /u, invalid escapes are not treated as identity escapes.
|
| - ReportError(CStrVector("Invalid unicode escape"));
|
| - return 0;
|
| - }
|
| - // If \u is not followed by a two-digit hexadecimal, treat it
|
| - // as an identity escape.
|
| - return 'u';
|
| + if (ParseUnicodeEscape(&value)) {
|
| + return value;
|
| + }
|
| + if (!unicode()) {
|
| + return 'u';
|
| + }
|
| + // If the 'u' flag is present, invalid escapes are not treated as
|
| + // identity escapes.
|
| + ReportError(CStrVector("Invalid unicode escape"));
|
| + return 0;
|
| }
|
| default: {
|
| uc32 result = current();
|
| - // With /u, no identity escapes except for syntax characters are
|
| - // allowed. Otherwise, all identity escapes are allowed.
|
| - if (!unicode() || IsSyntaxCharacterOrSlash(result)) {
|
| + // If the 'u' flag is present, only syntax characters can be escaped, no
|
| + // other identity escapes are allowed. If the 'u' flag is not present, all
|
| + // identity escapes are allowed.
|
| + if (!unicode() || IsSyntaxCharacter(result)) {
|
| Advance();
|
| return result;
|
| }
|
| @@ -991,7 +956,6 @@
|
|
|
| RegExpTree* RegExpParser::ParseCharacterClass() {
|
| static const char* kUnterminated = "Unterminated character class";
|
| - static const char* kRangeInvalid = "Invalid character class";
|
| static const char* kRangeOutOfOrder = "Range out of order in character class";
|
|
|
| DCHECK_EQ(current(), '[');
|
| @@ -1021,18 +985,13 @@
|
| CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
|
| if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
|
| // Either end is an escaped character class. Treat the '-' verbatim.
|
| - if (unicode()) {
|
| - // ES2015 21.2.2.15.1 step 1.
|
| - return ReportError(CStrVector(kRangeInvalid));
|
| - }
|
| AddRangeOrEscape(ranges, char_class, first, zone());
|
| ranges->Add(CharacterRange::Singleton('-'), zone());
|
| AddRangeOrEscape(ranges, char_class_2, next, zone());
|
| continue;
|
| }
|
| - // ES2015 21.2.2.15.1 step 6.
|
| if (first.from() > next.to()) {
|
| - return ReportError(CStrVector(kRangeOutOfOrder));
|
| + return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);
|
| }
|
| ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());
|
| } else {
|
| @@ -1040,7 +999,7 @@
|
| }
|
| }
|
| if (!has_more()) {
|
| - return ReportError(CStrVector(kUnterminated));
|
| + return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
|
| }
|
| Advance();
|
| if (ranges->length() == 0) {
|
| @@ -1203,7 +1162,7 @@
|
|
|
| void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
|
| if (NeedsDesugaringForUnicode(cc)) {
|
| - // With /u, character class needs to be desugared, so it
|
| + // In unicode mode, character class needs to be desugared, so it
|
| // must be a standalone term instead of being part of a RegExpText.
|
| AddTerm(cc);
|
| } else {
|
| @@ -1316,12 +1275,13 @@
|
| return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));
|
| }
|
|
|
| -bool RegExpBuilder::AddQuantifierToAtom(
|
| +
|
| +void RegExpBuilder::AddQuantifierToAtom(
|
| int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {
|
| FlushPendingSurrogate();
|
| if (pending_empty_) {
|
| pending_empty_ = false;
|
| - return true;
|
| + return;
|
| }
|
| RegExpTree* atom;
|
| if (characters_ != NULL) {
|
| @@ -1344,26 +1304,23 @@
|
| } else if (terms_.length() > 0) {
|
| DCHECK(last_added_ == ADD_ATOM);
|
| atom = terms_.RemoveLast();
|
| - // With /u, lookarounds are not quantifiable.
|
| - if (unicode() && atom->IsLookaround()) return false;
|
| if (atom->max_match() == 0) {
|
| // Guaranteed to only match an empty string.
|
| LAST(ADD_TERM);
|
| if (min == 0) {
|
| - return true;
|
| + return;
|
| }
|
| terms_.Add(atom, zone());
|
| - return true;
|
| + return;
|
| }
|
| } else {
|
| // Only call immediately after adding an atom or character!
|
| UNREACHABLE();
|
| - return false;
|
| + return;
|
| }
|
| terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
|
| zone());
|
| LAST(ADD_TERM);
|
| - return true;
|
| }
|
|
|
| } // namespace internal
|
|
|