Chromium Code Reviews| Index: src/parser.cc |
| diff --git a/src/parser.cc b/src/parser.cc |
| index bfdeaa3276dbd047236ce6eec09470e55650230c..3f7ce4d4a9c23c4042e55d6c6a4637f324bf1aba 100644 |
| --- a/src/parser.cc |
| +++ b/src/parser.cc |
| @@ -4278,10 +4278,8 @@ void Parser::Internalize() { |
| // Regular expressions |
| -RegExpParser::RegExpParser(FlatStringReader* in, |
| - Handle<String>* error, |
| - bool multiline, |
| - Zone* zone) |
| +RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| + bool multiline, bool unicode, Zone* zone) |
| : isolate_(zone->isolate()), |
| zone_(zone), |
| error_(error), |
| @@ -4292,6 +4290,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, |
| capture_count_(0), |
| has_more_(true), |
| multiline_(multiline), |
| + unicode_(unicode), |
| simple_(false), |
| contains_anchor_(false), |
| is_scanned_for_captures_(false), |
| @@ -4348,6 +4347,13 @@ bool RegExpParser::simple() { |
| } |
| +bool RegExpParser::IsSyntaxCharacter(uc32 c) { |
| + return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || |
| + c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' || |
| + c == '{' || c == '}' || c == '|'; |
| +} |
|
mathias
2015/01/08 12:29:07
Should `-` be a “syntax character” as well because
marja
2015/01/08 13:42:18
The spec (draft rev 30) says:
SyntaxCharacter ::
rossberg
2015/01/08 14:11:45
It's not a SyntaxCharacter and that's likely inten
mathias
2015/01/21 07:16:54
/[\-]/u is now allowed: https://bugs.ecmascript.or
|
| + |
| + |
| RegExpTree* RegExpParser::ReportError(Vector<const char> message) { |
| failed_ = true; |
| *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); |
| @@ -4564,9 +4570,15 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
| } |
| uc32 first_digit = Next(); |
| if (first_digit == '8' || first_digit == '9') { |
| - // Treat as identity escape |
| - builder->AddCharacter(first_digit); |
| - Advance(2); |
| + // If the 'u' flag is present, only syntax characters can be escaped, |
| + // no other identity escapes are allowed. If the 'u' flag is not |
| + // present, all identity escapes are allowed. |
| + if (!FLAG_harmony_unicode || !unicode_) { |
| + builder->AddCharacter(first_digit); |
| + Advance(2); |
| + } else { |
| + return ReportError(CStrVector("Invalid escape")); |
| + } |
| break; |
| } |
| } |
| @@ -4622,25 +4634,41 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
| uc32 value; |
| if (ParseHexEscape(2, &value)) { |
| builder->AddCharacter(value); |
| - } else { |
| + } else if (!FLAG_harmony_unicode || !unicode_) { |
| builder->AddCharacter('x'); |
| + } else { |
| + // If the 'u' flag is present, invalid escapes are not treated as |
| + // identity escapes. |
| + return ReportError(CStrVector("Invalid escape")); |
| } |
| break; |
| } |
| case 'u': { |
| Advance(2); |
| uc32 value; |
| - if (ParseHexEscape(4, &value)) { |
| + if (ParseUnicodeEscape(&value)) { |
| builder->AddCharacter(value); |
| - } else { |
| + } else if (!FLAG_harmony_unicode || !unicode_) { |
| builder->AddCharacter('u'); |
| + } else { |
| + // If the 'u' flag is present, invalid escapes are not treated as |
| + // identity escapes. |
| + return ReportError(CStrVector("Invalid unicode escape")); |
| } |
| break; |
| } |
| default: |
| - // Identity escape. |
| - builder->AddCharacter(Next()); |
| - Advance(2); |
| + Advance(); |
| + // If the 'u' flag is present, only syntax characters can be escaped, no |
| + // other identity escapes are allowed. If the 'u' flag is not present, |
| + // all identity escapes are allowed. |
| + if (!FLAG_harmony_unicode || !unicode_ || |
| + IsSyntaxCharacter(current())) { |
| + builder->AddCharacter(current()); |
| + Advance(); |
| + } else { |
| + return ReportError(CStrVector("Invalid escape")); |
| + } |
| break; |
| } |
| break; |
| @@ -4883,11 +4911,10 @@ uc32 RegExpParser::ParseOctalLiteral() { |
| } |
| -bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
| +bool RegExpParser::ParseHexEscape(int length, uc32* value) { |
| int start = position(); |
| uc32 val = 0; |
| - bool done = false; |
| - for (int i = 0; !done; i++) { |
| + for (int i = 0; i < length; ++i) { |
| uc32 c = current(); |
| int d = HexValue(c); |
| if (d < 0) { |
| @@ -4896,15 +4923,52 @@ bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
| } |
| val = val * 16 + d; |
| Advance(); |
| - if (i == length - 1) { |
| - done = true; |
| - } |
| } |
| *value = val; |
| return true; |
| } |
| +bool RegExpParser::ParseUnicodeEscape(uc32* value) { |
| + // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are |
| + // allowed). In the latter case, the number of hex digits between { } is |
| + // arbitrary. \ and u have already been read. |
| + if (current() == '{' && FLAG_harmony_unicode && unicode_) { |
| + int start = position(); |
| + Advance(); |
| + if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) { |
| + if (current() == '}') { |
| + Advance(); |
| + return true; |
| + } |
| + } |
| + Reset(start); |
| + return false; |
| + } |
| + // \u but no {, or \u{...} escapes not allowed. |
| + return ParseHexEscape(4, value); |
| +} |
| + |
| + |
| +bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
| + uc32 x = 0; |
| + int d = HexValue(current()); |
| + if (d < 0) { |
| + return false; |
| + } |
| + while (d >= 0) { |
| + x = x * 16 + d; |
| + if (x > max_value) { |
| + return false; |
| + } |
| + Advance(); |
| + d = HexValue(current()); |
| + } |
| + *value = x; |
| + return true; |
| +} |
| + |
| + |
| uc32 RegExpParser::ParseClassCharacterEscape() { |
| DCHECK(current() == '\\'); |
| DCHECK(has_next() && !IsSpecialClassEscape(Next())); |
| @@ -4959,27 +5023,41 @@ uc32 RegExpParser::ParseClassCharacterEscape() { |
| if (ParseHexEscape(2, &value)) { |
| return value; |
| } |
| - // If \x is not followed by a two-digit hexadecimal, treat it |
| - // as an identity escape. |
| - return 'x'; |
| + if (!FLAG_harmony_unicode || !unicode_) { |
| + // If \x is not followed by a two-digit hexadecimal, treat it |
| + // as an identity escape. |
| + return 'x'; |
| + } |
| + // If the 'u' flag is present, invalid escapes are not treated as |
| + // identity escapes. |
| + ReportError(CStrVector("Invalid escape")); |
| + return 0; |
| } |
| case 'u': { |
| Advance(); |
| uc32 value; |
| - if (ParseHexEscape(4, &value)) { |
| + if (ParseUnicodeEscape(&value)) { |
| return value; |
| } |
| - // If \u is not followed by a four-digit hexadecimal, treat it |
| - // as an identity escape. |
| - return 'u'; |
| + if (!FLAG_harmony_unicode || !unicode_) { |
| + return 'u'; |
| + } |
| + // If the 'u' flag is present, invalid escapes are not treated as |
| + // identity escapes. |
| + ReportError(CStrVector("Invalid unicode escape")); |
| + return 0; |
| } |
| default: { |
| - // Extended identity escape. We accept any character that hasn't |
| - // been matched by a more specific case, not just the subset required |
| - // by the ECMAScript specification. |
| uc32 result = current(); |
| - Advance(); |
| - return result; |
| + // If the 'u' flag is present, only syntax characters can be escaped, no |
| + // other identity escapes are allowed. If the 'u' flag is not present, all |
| + // identity escapes are allowed. |
| + if (!FLAG_harmony_unicode || !unicode_ || IsSyntaxCharacter(result)) { |
| + Advance(); |
| + return result; |
| + } |
| + ReportError(CStrVector("Invalid escape")); |
| + return 0; |
| } |
| } |
| return 0; |
| @@ -5085,12 +5163,11 @@ RegExpTree* RegExpParser::ParseCharacterClass() { |
| // ---------------------------------------------------------------------------- |
| // The Parser interface. |
| -bool RegExpParser::ParseRegExp(FlatStringReader* input, |
| - bool multiline, |
| - RegExpCompileData* result, |
| +bool RegExpParser::ParseRegExp(FlatStringReader* input, bool multiline, |
| + bool unicode, RegExpCompileData* result, |
| Zone* zone) { |
| DCHECK(result != NULL); |
| - RegExpParser parser(input, &result->error, multiline, zone); |
| + RegExpParser parser(input, &result->error, multiline, unicode, zone); |
| RegExpTree* tree = parser.ParsePattern(); |
| if (parser.failed()) { |
| DCHECK(tree == NULL); |