src/parsing/scanner.cc - Issue 1793913002: [parser] implement error reporting for Scanner errors

Unified Diff: src/parsing/scanner.cc

Issue 1793913002: [parser] implement error reporting for Scanner errors (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: rebased Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/parsing/scanner.cc

diff --git a/src/parsing/scanner.cc b/src/parsing/scanner.cc

index 2d5a57958336ef1ec4bb65b8ce63da1efc199a3e..718ec0bec41a01835adbb79e6acddeb3f09e525a 100644

--- a/src/parsing/scanner.cc

+++ b/src/parsing/scanner.cc

@@ -60,15 +60,19 @@ void Scanner::Initialize(Utf16CharacterStream* source) {

Scan();

}

-template <bool capture_raw>

+template <bool capture_raw, bool unicode>

uc32 Scanner::ScanHexNumber(int expected_length) {

DCHECK(expected_length <= 4); // prevent overflow

+ int begin = source_pos() - 2;

uc32 x = 0;

for (int i = 0; i < expected_length; i++) {

int d = HexValue(c0_);

if (d < 0) {

+ ReportScannerError(unicode

+ ? MessageTemplate::kInvalidUnicodeEscapeSequence

+ : MessageTemplate::kInvalidHexEscapeSequence,

+ begin, begin + expected_length + 2);

return -1;

}

x = x * 16 + d;

@@ -78,20 +82,27 @@ uc32 Scanner::ScanHexNumber(int expected_length) {

return x;

}

template <bool capture_raw>

-uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {

+uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool& bad_codepoint) {

adamk 2016/03/18 18:21:55 The right type here is bool* in V8

caitp (gmail) 2016/03/18 19:00:36 dont you think reference ls are better for prevent

adamk 2016/03/18 19:09:57 I think bool args are dangerous in C++ no matter w

caitp (gmail) 2016/03/21 16:15:03 Since the early return was put back, there isn't m

uc32 x = 0;

int d = HexValue(c0_);

- if (d < 0) {

- return -1;

adamk 2016/03/18 18:21:55 Losing this and the below early return makes me a

caitp (gmail) 2016/03/18 19:00:36 the idea is, if you get hex characters until the }

adamk 2016/03/18 19:09:57 Yeah, I'd just prefer a minimal change if the loca

caitp (gmail) 2016/03/21 16:15:03 Done.

- }

while (d >= 0) {

x = x * 16 + d;

- if (x > max_value) return -1;

+ if (x > max_value) {

+ bad_codepoint = true;

+ }

Advance<capture_raw>();

d = HexValue(c0_);

}

+ if (d < 0 && (c0_ != '}')) {

+ ReportScannerError(x != 0 ? MessageTemplate::kUnclosedUnicodeEscapeSequence

+ : MessageTemplate::kInvalidUnicodeEscapeSequence,

+ source_pos());

+ return -1;

+ }

return x;

}

@@ -847,7 +858,9 @@ Token::Value Scanner::ScanString() {

uc32 c = c0_;

Advance();

if (c == '\\') {

- if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL;

+ if (c0_ < 0 || !ScanEscape<false, false>()) {

+ return Token::ILLEGAL;

+ }

} else {

AddLiteralChar(c);

}

@@ -879,7 +892,6 @@ Token::Value Scanner::ScanTemplateSpan() {

StartRawLiteral();

const bool capture_raw = true;

const bool in_template_literal = true;

while (true) {

uc32 c = c0_;

Advance<capture_raw>();

@@ -1099,18 +1111,21 @@ uc32 Scanner::ScanUnicodeEscape() {

// Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of

// hex digits between { } is arbitrary. \ and u have already been read.

if (c0_ == '{') {

+ int begin = source_pos() - 2;

+ bool bad_codepoint = false;

Advance<capture_raw>();

- uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);

- if (cp < 0) {

- return -1;

- }

- if (c0_ != '}') {

+ uc32 cp =

+ ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, bad_codepoint);

+ if (cp < 0 || bad_codepoint) {

+ ReportScannerError(MessageTemplate::kUndefinedUnicodeCodePoint, begin,

+ source_pos() + 1);

return -1;

}

Advance<capture_raw>();

return cp;

}

- return ScanHexNumber<capture_raw>(4);

+ const bool unicode = true;

+ return ScanHexNumber<capture_raw, unicode>(4);

}

« src/parsing/scanner.h ('K') | « src/parsing/scanner.h ('k') | test/message/regress/regress-4829-1.out » ('j') | test/message/unicode-escape-invalid.out » ('J')