Index: src/parser.cc |
diff --git a/src/parser.cc b/src/parser.cc |
index bfdeaa3276dbd047236ce6eec09470e55650230c..3f7ce4d4a9c23c4042e55d6c6a4637f324bf1aba 100644 |
--- a/src/parser.cc |
+++ b/src/parser.cc |
@@ -4278,10 +4278,8 @@ void Parser::Internalize() { |
// Regular expressions |
-RegExpParser::RegExpParser(FlatStringReader* in, |
- Handle<String>* error, |
- bool multiline, |
- Zone* zone) |
+RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
+ bool multiline, bool unicode, Zone* zone) |
: isolate_(zone->isolate()), |
zone_(zone), |
error_(error), |
@@ -4292,6 +4290,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, |
capture_count_(0), |
has_more_(true), |
multiline_(multiline), |
+ unicode_(unicode), |
simple_(false), |
contains_anchor_(false), |
is_scanned_for_captures_(false), |
@@ -4348,6 +4347,13 @@ bool RegExpParser::simple() { |
} |
+bool RegExpParser::IsSyntaxCharacter(uc32 c) { |
+ return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || |
+ c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' || |
+ c == '{' || c == '}' || c == '|'; |
+} |
+ |
+ |
RegExpTree* RegExpParser::ReportError(Vector<const char> message) { |
failed_ = true; |
*error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); |
@@ -4564,9 +4570,15 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
} |
uc32 first_digit = Next(); |
if (first_digit == '8' || first_digit == '9') { |
- // Treat as identity escape |
- builder->AddCharacter(first_digit); |
- Advance(2); |
+ // If the 'u' flag is present, only syntax characters can be escaped, |
+ // no other identity escapes are allowed. If the 'u' flag is not |
+ // present, all identity escapes are allowed. |
+ if (!FLAG_harmony_unicode || !unicode_) { |
+ builder->AddCharacter(first_digit); |
+ Advance(2); |
+ } else { |
+ return ReportError(CStrVector("Invalid escape")); |
+ } |
break; |
} |
} |
@@ -4622,25 +4634,41 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
uc32 value; |
if (ParseHexEscape(2, &value)) { |
builder->AddCharacter(value); |
- } else { |
+ } else if (!FLAG_harmony_unicode || !unicode_) { |
builder->AddCharacter('x'); |
+ } else { |
+ // If the 'u' flag is present, invalid escapes are not treated as |
+ // identity escapes. |
+ return ReportError(CStrVector("Invalid escape")); |
} |
break; |
} |
case 'u': { |
Advance(2); |
uc32 value; |
- if (ParseHexEscape(4, &value)) { |
+ if (ParseUnicodeEscape(&value)) { |
builder->AddCharacter(value); |
- } else { |
+ } else if (!FLAG_harmony_unicode || !unicode_) { |
builder->AddCharacter('u'); |
+ } else { |
+ // If the 'u' flag is present, invalid escapes are not treated as |
+ // identity escapes. |
+ return ReportError(CStrVector("Invalid unicode escape")); |
} |
break; |
} |
default: |
- // Identity escape. |
- builder->AddCharacter(Next()); |
- Advance(2); |
+ Advance(); |
+ // If the 'u' flag is present, only syntax characters can be escaped, no |
+ // other identity escapes are allowed. If the 'u' flag is not present, |
+ // all identity escapes are allowed. |
+ if (!FLAG_harmony_unicode || !unicode_ || |
+ IsSyntaxCharacter(current())) { |
+ builder->AddCharacter(current()); |
+ Advance(); |
+ } else { |
+ return ReportError(CStrVector("Invalid escape")); |
+ } |
break; |
} |
break; |
@@ -4883,11 +4911,10 @@ uc32 RegExpParser::ParseOctalLiteral() { |
} |
-bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
+bool RegExpParser::ParseHexEscape(int length, uc32* value) { |
int start = position(); |
uc32 val = 0; |
- bool done = false; |
- for (int i = 0; !done; i++) { |
+ for (int i = 0; i < length; ++i) { |
uc32 c = current(); |
int d = HexValue(c); |
if (d < 0) { |
@@ -4896,15 +4923,52 @@ bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
} |
val = val * 16 + d; |
Advance(); |
- if (i == length - 1) { |
- done = true; |
- } |
} |
*value = val; |
return true; |
} |
+bool RegExpParser::ParseUnicodeEscape(uc32* value) { |
+ // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are |
+ // allowed). In the latter case, the number of hex digits between { } is |
+ // arbitrary. \ and u have already been read. |
+ if (current() == '{' && FLAG_harmony_unicode && unicode_) { |
+ int start = position(); |
+ Advance(); |
+ if (ParseUnlimitedLengthHexNumber(0x10ffff, value)) { |
+ if (current() == '}') { |
+ Advance(); |
+ return true; |
+ } |
+ } |
+ Reset(start); |
+ return false; |
+ } |
+ // \u but no {, or \u{...} escapes not allowed. |
+ return ParseHexEscape(4, value); |
+} |
+ |
+ |
+bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
+ uc32 x = 0; |
+ int d = HexValue(current()); |
+ if (d < 0) { |
+ return false; |
+ } |
+ while (d >= 0) { |
+ x = x * 16 + d; |
+ if (x > max_value) { |
+ return false; |
+ } |
+ Advance(); |
+ d = HexValue(current()); |
+ } |
+ *value = x; |
+ return true; |
+} |
+ |
+ |
uc32 RegExpParser::ParseClassCharacterEscape() { |
DCHECK(current() == '\\'); |
DCHECK(has_next() && !IsSpecialClassEscape(Next())); |
@@ -4959,27 +5023,41 @@ uc32 RegExpParser::ParseClassCharacterEscape() { |
if (ParseHexEscape(2, &value)) { |
return value; |
} |
- // If \x is not followed by a two-digit hexadecimal, treat it |
- // as an identity escape. |
- return 'x'; |
+ if (!FLAG_harmony_unicode || !unicode_) { |
+ // If \x is not followed by a two-digit hexadecimal, treat it |
+ // as an identity escape. |
+ return 'x'; |
+ } |
+ // If the 'u' flag is present, invalid escapes are not treated as |
+ // identity escapes. |
+ ReportError(CStrVector("Invalid escape")); |
+ return 0; |
} |
case 'u': { |
Advance(); |
uc32 value; |
- if (ParseHexEscape(4, &value)) { |
+ if (ParseUnicodeEscape(&value)) { |
return value; |
} |
- // If \u is not followed by a four-digit hexadecimal, treat it |
- // as an identity escape. |
- return 'u'; |
+ if (!FLAG_harmony_unicode || !unicode_) { |
+ return 'u'; |
+ } |
+ // If the 'u' flag is present, invalid escapes are not treated as |
+ // identity escapes. |
+ ReportError(CStrVector("Invalid unicode escape")); |
+ return 0; |
} |
default: { |
- // Extended identity escape. We accept any character that hasn't |
- // been matched by a more specific case, not just the subset required |
- // by the ECMAScript specification. |
uc32 result = current(); |
- Advance(); |
- return result; |
+ // If the 'u' flag is present, only syntax characters can be escaped, no |
+ // other identity escapes are allowed. If the 'u' flag is not present, all |
+ // identity escapes are allowed. |
+ if (!FLAG_harmony_unicode || !unicode_ || IsSyntaxCharacter(result)) { |
+ Advance(); |
+ return result; |
+ } |
+ ReportError(CStrVector("Invalid escape")); |
+ return 0; |
} |
} |
return 0; |
@@ -5085,12 +5163,11 @@ RegExpTree* RegExpParser::ParseCharacterClass() { |
// ---------------------------------------------------------------------------- |
// The Parser interface. |
-bool RegExpParser::ParseRegExp(FlatStringReader* input, |
- bool multiline, |
- RegExpCompileData* result, |
+bool RegExpParser::ParseRegExp(FlatStringReader* input, bool multiline, |
+ bool unicode, RegExpCompileData* result, |
Zone* zone) { |
DCHECK(result != NULL); |
- RegExpParser parser(input, &result->error, multiline, zone); |
+ RegExpParser parser(input, &result->error, multiline, unicode, zone); |
RegExpTree* tree = parser.ParsePattern(); |
if (parser.failed()) { |
DCHECK(tree == NULL); |