Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1986)

Unified Diff: src/scanner-base.cc

Issue 7677012: Make scanner handle invalid unicode escapes in identifiers correctly. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Addressed review comments. Fixed bug, updated sputnik expectations. Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/scanner-base.h ('k') | test/mjsunit/regress/regress-1620.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/scanner-base.cc
diff --git a/src/scanner-base.cc b/src/scanner-base.cc
index 2ecbfd2a95be8281c3bd38f93c3720ca0df55318..62eee1a548c2fed21faa0261e7125398c573779f 100644
--- a/src/scanner-base.cc
+++ b/src/scanner-base.cc
@@ -41,12 +41,12 @@ Scanner::Scanner(UnicodeCache* unicode_cache)
: unicode_cache_(unicode_cache) { }
-uc32 Scanner::ScanHexEscape(uc32 c, int length) {
- ASSERT(length <= 4); // prevent overflow
+uc32 Scanner::ScanHexNumber(int expected_length) {
+ ASSERT(expected_length <= 4); // prevent overflow
- uc32 digits[4];
+ uc32 digits[4] = { 0, 0, 0, 0 };
uc32 x = 0;
- for (int i = 0; i < length; i++) {
+ for (int i = 0; i < expected_length; i++) {
digits[i] = c0_;
int d = HexValue(c0_);
if (d < 0) {
@@ -54,12 +54,11 @@ uc32 Scanner::ScanHexEscape(uc32 c, int length) {
// should be illegal, but other JS VMs just return the
// non-escaped version of the original character.
- // Push back digits read, except the last one (in c0_).
+ // Push back digits that we have advanced past.
for (int j = i-1; j >= 0; j--) {
PushBack(digits[j]);
}
- // Notice: No handling of error - treat it as "\u"->"u".
- return c;
+ return -1;
}
x = x * 16 + d;
Advance();
@@ -640,9 +639,17 @@ void JavaScriptScanner::ScanEscape() {
case 'n' : c = '\n'; break;
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
- case 'u' : c = ScanHexEscape(c, 4); break;
+ case 'u' : {
+ c = ScanHexNumber(4);
+ if (c < 0) c = 'u';
+ break;
+ }
case 'v' : c = '\v'; break;
- case 'x' : c = ScanHexEscape(c, 2); break;
+ case 'x' : {
+ c = ScanHexNumber(2);
+ if (c < 0) c = 'x';
+ break;
+ }
case '0' : // fall through
case '1' : // fall through
case '2' : // fall through
@@ -802,13 +809,11 @@ Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
Advance();
- if (c0_ != 'u') return unibrow::Utf8::kBadChar;
+ if (c0_ != 'u') return -1;
Advance();
- uc32 c = ScanHexEscape('u', 4);
- // We do not allow a unicode escape sequence to start another
- // unicode escape sequence.
- if (c == '\\') return unibrow::Utf8::kBadChar;
- return c;
+ uc32 result = ScanHexNumber(4);
+ if (result < 0) PushBack('u');
+ return result;
}
@@ -926,7 +931,11 @@ Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters.
- if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL;
+ if (c < 0 ||
+ c == '\\' || // No recursive escapes.
+ !unicode_cache_->IsIdentifierStart(c)) {
+ return Token::ILLEGAL;
+ }
AddLiteralChar(c);
return ScanIdentifierSuffix(&literal);
}
@@ -966,7 +975,11 @@ Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier part characters.
- if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL;
+ if (c < 0 ||
+ c == '\\' ||
+ !unicode_cache_->IsIdentifierPart(c)) {
+ return Token::ILLEGAL;
+ }
AddLiteralChar(c);
} else {
AddLiteralChar(c0_);
@@ -992,8 +1005,9 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
// the scanner should pass uninterpreted bodies to the RegExp
// constructor.
LiteralScope literal(this);
- if (seen_equal)
+ if (seen_equal) {
AddLiteralChar('=');
+ }
while (c0_ != '/' || in_character_class) {
if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false;
@@ -1025,20 +1039,47 @@ bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
}
+bool JavaScriptScanner::ScanLiteralUnicodeEscape() {
+ ASSERT(c0_ == '\\');
+ uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
+ Advance();
+ int i = 1;
+ if (c0_ == 'u') {
+ i++;
+ while (i < 6) {
+ Advance();
+ if (!IsHexDigit(c0_)) break;
+ chars_read[i] = c0_;
+ i++;
+ }
+ }
+ if (i < 6) {
+ // Incomplete escape. Undo all advances and return false.
+ while (i > 0) {
+ i--;
+ PushBack(chars_read[i]);
+ }
+ return false;
+ }
+ // Complete escape. Add all chars to current literal buffer.
+ for (int i = 0; i < 6; i++) {
+ AddLiteralChar(chars_read[i]);
+ }
+ return true;
+}
+
+
bool JavaScriptScanner::ScanRegExpFlags() {
// Scan regular expression flags.
LiteralScope literal(this);
while (unicode_cache_->IsIdentifierPart(c0_)) {
- if (c0_ == '\\') {
- uc32 c = ScanIdentifierUnicodeEscape();
- if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
- // We allow any escaped character, unlike the restriction on
- // IdentifierPart when it is used to build an IdentifierName.
- AddLiteralChar(c);
- continue;
+ if (c0_ != '\\') {
+ AddLiteralCharAdvance();
+ } else {
+ if (!ScanLiteralUnicodeEscape()) {
+ break;
}
}
- AddLiteralCharAdvance();
}
literal.Complete();
« no previous file with comments | « src/scanner-base.h ('k') | test/mjsunit/regress/regress-1620.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698