Index: src/parsing/scanner.cc |
diff --git a/src/parsing/scanner.cc b/src/parsing/scanner.cc |
index 2d5a57958336ef1ec4bb65b8ce63da1efc199a3e..718ec0bec41a01835adbb79e6acddeb3f09e525a 100644 |
--- a/src/parsing/scanner.cc |
+++ b/src/parsing/scanner.cc |
@@ -60,15 +60,19 @@ void Scanner::Initialize(Utf16CharacterStream* source) { |
Scan(); |
} |
- |
-template <bool capture_raw> |
+template <bool capture_raw, bool unicode> |
uc32 Scanner::ScanHexNumber(int expected_length) { |
DCHECK(expected_length <= 4); // prevent overflow |
+ int begin = source_pos() - 2; |
uc32 x = 0; |
for (int i = 0; i < expected_length; i++) { |
int d = HexValue(c0_); |
if (d < 0) { |
+ ReportScannerError(unicode |
+ ? MessageTemplate::kInvalidUnicodeEscapeSequence |
+ : MessageTemplate::kInvalidHexEscapeSequence, |
+ begin, begin + expected_length + 2); |
return -1; |
} |
x = x * 16 + d; |
@@ -78,20 +82,27 @@ uc32 Scanner::ScanHexNumber(int expected_length) { |
return x; |
} |
- |
template <bool capture_raw> |
-uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { |
+uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool& bad_codepoint) { |
adamk
2016/03/18 18:21:55
The right type here is bool* in V8
caitp (gmail)
2016/03/18 19:00:36
dont you think reference ls are better for prevent
adamk
2016/03/18 19:09:57
I think bool args are dangerous in C++ no matter w
caitp (gmail)
2016/03/21 16:15:03
Since the early return was put back, there isn't m
|
uc32 x = 0; |
int d = HexValue(c0_); |
- if (d < 0) { |
- return -1; |
adamk
2016/03/18 18:21:55
Losing this and the below early return makes me a
caitp (gmail)
2016/03/18 19:00:36
the idea is, if you get hex characters until the }
adamk
2016/03/18 19:09:57
Yeah, I'd just prefer a minimal change if the loca
caitp (gmail)
2016/03/21 16:15:03
Done.
|
- } |
+ |
while (d >= 0) { |
x = x * 16 + d; |
- if (x > max_value) return -1; |
+ if (x > max_value) { |
+ bad_codepoint = true; |
+ } |
Advance<capture_raw>(); |
d = HexValue(c0_); |
} |
+ |
+ if (d < 0 && (c0_ != '}')) { |
+ ReportScannerError(x != 0 ? MessageTemplate::kUnclosedUnicodeEscapeSequence |
+ : MessageTemplate::kInvalidUnicodeEscapeSequence, |
+ source_pos()); |
+ return -1; |
+ } |
+ |
return x; |
} |
@@ -847,7 +858,9 @@ Token::Value Scanner::ScanString() { |
uc32 c = c0_; |
Advance(); |
if (c == '\\') { |
- if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; |
+ if (c0_ < 0 || !ScanEscape<false, false>()) { |
+ return Token::ILLEGAL; |
+ } |
} else { |
AddLiteralChar(c); |
} |
@@ -879,7 +892,6 @@ Token::Value Scanner::ScanTemplateSpan() { |
StartRawLiteral(); |
const bool capture_raw = true; |
const bool in_template_literal = true; |
- |
while (true) { |
uc32 c = c0_; |
Advance<capture_raw>(); |
@@ -1099,18 +1111,21 @@ uc32 Scanner::ScanUnicodeEscape() { |
// Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of |
// hex digits between { } is arbitrary. \ and u have already been read. |
if (c0_ == '{') { |
+ int begin = source_pos() - 2; |
+ bool bad_codepoint = false; |
Advance<capture_raw>(); |
- uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); |
- if (cp < 0) { |
- return -1; |
- } |
- if (c0_ != '}') { |
+ uc32 cp = |
+ ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, bad_codepoint); |
+ if (cp < 0 || bad_codepoint) { |
+ ReportScannerError(MessageTemplate::kUndefinedUnicodeCodePoint, begin, |
+ source_pos() + 1); |
return -1; |
} |
Advance<capture_raw>(); |
return cp; |
} |
- return ScanHexNumber<capture_raw>(4); |
+ const bool unicode = true; |
+ return ScanHexNumber<capture_raw, unicode>(4); |
} |