pkg/dart_scanner/lib/src/recover.dart - Issue 2664593002: Port parser and scanner fixes from rasta branch.

Unified Diff: pkg/dart_scanner/lib/src/recover.dart

Issue 2664593002: Port parser and scanner fixes from rasta branch. (Closed)

Patch Set: Update status files. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« pkg/dart_scanner/lib/src/error_token.dart ('K') | « pkg/dart_scanner/lib/src/keyword.dart ('k') | pkg/dart_scanner/lib/src/token.dart » ('j') | pkg/dart_scanner/lib/src/utf8_bytes_scanner.dart » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: pkg/dart_scanner/lib/src/recover.dart

diff --git a/pkg/dart_scanner/lib/src/recover.dart b/pkg/dart_scanner/lib/src/recover.dart

index 837274717e592b6404814bf5d0b5b8deba51b598..cf32812e4342b7a9fbb25b4b7f437c001f02d2d5 100644

--- a/pkg/dart_scanner/lib/src/recover.dart

+++ b/pkg/dart_scanner/lib/src/recover.dart

@@ -5,8 +5,19 @@

library dart_scanner.recover;

import 'token.dart' show

+ StringToken,

Token;

+import 'error_token.dart' show

+ NonAsciiIdentifierToken,

+ ErrorKind,

+ ErrorToken;

+import 'precedence.dart' as Precedence;

+import 'precedence.dart' show

+ PrecedenceInfo;

/// Recover from errors in [tokens]. The original sources are provided as

/// [bytes]. [lineStarts] are the beginning character offsets of lines, and

/// must be updated if recovery is performed rewriting the original source

@@ -25,5 +36,196 @@ Token defaultRecoveryStrategy(

// [ArrayBasedScanner.discardBeginGroupUntil](array_based_scanner.dart). For

// more details on brace grouping see

// [AbstractScanner.unmatchedBeginGroup](abstract_scanner.dart).

- return tokens;

+ ErrorToken error;

+ ErrorToken errorTail;

+ Token good;

+ Token goodTail;

+ Token beforeGoodTail;

Johnni Winther 2017/01/30 09:04:38 Add doc to these variables.

ahe 2017/01/30 13:26:22 Done.

+ recoverIdentifier(NonAsciiIdentifierToken first) {

+ List codeUnits = <int>[];

+ // True if the previous good token is an identifier and ends right where

+ // [first] starts. This is the case for input like `blåbærgrød`. In this

+ // case, the scanner produces this sequence of tokens:

+ //

+ // [

+ // StringToken("bl"),

+ // NonAsciiIdentifierToken("å"),

+ // StringToken("b"),

+ // NonAsciiIdentifierToken("æ"),

+ // StringToken("rgr"),

+ // NonAsciiIdentifierToken("ø"),

+ // StringToken("d"),

+ // EOF,

+ // ]

+ bool prepend = false;

+ // True if following token is also an identifier that starts right where

+ // [errorTail] ends. This is the case for "b" above.

+ bool append = false;

+ if (goodTail != null) {

+ if (goodTail.info == Precedence.IDENTIFIER_INFO &&

+ goodTail.charEnd == first.charOffset) {

+ prepend = true;

+ }

+ Token next = errorTail.next;

+ if (next.info == Precedence.IDENTIFIER_INFO &&

+ errorTail.charOffset + 1 == next.charOffset) {

+ append = true;

+ }

+ if (prepend) {

+ codeUnits.addAll(goodTail.value.codeUnits);

+ }

+ NonAsciiIdentifierToken current = first;

+ while (current != errorTail) {

+ codeUnits.add(current.character);

+ current = current.next;

+ }

+ codeUnits.add(errorTail.character);

+ int charOffset = first.charOffset;

+ if (prepend) {

+ charOffset = goodTail.charOffset;

+ if (beforeGoodTail == null) {

+ // We're prepending the first good token, so the new token will become

+ // the first good tooken.

Johnni Winther 2017/01/30 09:04:38 tooken -> token

ahe 2017/01/30 13:26:22 Done.

+ good = null;

+ goodTail = null;

+ beforeGoodTail = null;

+ } else {

+ goodTail = beforeGoodTail;

+ }

+ if (append) {

+ codeUnits.addAll(next.value.codeUnits);

+ next = next.next;

+ }

+ String value = new String.fromCharCodes(codeUnits);

+ Token recovered = synthesizeToken(

+ charOffset, value, Precedence.IDENTIFIER_INFO);

+ recovered.next = next;

+ return recovered;

+ }

+ recoverExponent() {

+ return synthesizeToken(errorTail.charOffset, "NaN", Precedence.DOUBLE_INFO);

+ }

+ recoverString() {

+ // TODO(ahe): Improve this.

+ return skipToEof(errorTail);

+ }

+ recoverHexDigit() {

+ return synthesizeToken(errorTail.charOffset, "-1", Precedence.INT_INFO);

+ }

+ recoverStringInterpolation() {

+ // TODO(ahe): Improve this.

+ return skipToEof(errorTail);

+ }

+ recoverComment() {

+ // TODO(ahe): Improve this.

+ return skipToEof(errorTail);

+ }

+ recoverUnmatched() {

+ // TODO(ahe): Try to use top-level keywords (such as `class`, `typedef`,

+ // and `enum`) and identation to recover.

+ return errorTail;

+ }

+ for (Token current = tokens; !current.isEof; current = current.next) {

+ if (current is ErrorToken) {

+ ErrorToken first = current;

+ Token next = current;

+ bool treatAsWhitespace = false;

+ do {

+ current = next;

+ if (errorTail == null) {

+ error = next;

+ } else {

+ errorTail.next = next;

+ }

+ errorTail = next;

+ next = next.next;

+ } while (next is ErrorToken && first.errorCode == next.errorCode);

+ switch (first.errorCode) {

+ case ErrorKind.Encoding:

+ case ErrorKind.NonAsciiWhitespace:

+ case ErrorKind.AsciiControlCharacter:

+ treatAsWhitespace = true;

+ break;

+ case ErrorKind.NonAsciiIdentifier:

+ current = recoverIdentifier(first);

+ break;

+ case ErrorKind.MissingExponent:

+ current = recoverExponent();

+ break;

+ case ErrorKind.UnterminatedString:

+ current = recoverString();

+ break;

+ case ErrorKind.ExpectedHexDigit:

+ current = recoverHexDigit();

+ break;

+ case ErrorKind.UnexpectedDollarInString:

+ current = recoverStringInterpolation();

+ break;

+ case ErrorKind.UnterminatedComment:

+ current = recoverComment();

+ break;

+ case ErrorKind.UnmatchedToken:

+ current = recoverUnmatched();

+ break;

+ case ErrorKind.UnterminatedToken: // TODO(ahe): Can this happen?

+ default:

+ treatAsWhitespace = true;

+ break;

+ }

+ if (treatAsWhitespace) continue;

+ }

+ if (goodTail == null) {

+ good = current;

+ } else {

+ goodTail.next = current;

+ }

+ beforeGoodTail = goodTail;

+ goodTail = current;

+ }

+ errorTail.next = good;

+ return error;

+Token synthesizeToken(int charOffset, String value, PrecedenceInfo info) {

+ return new StringToken.fromString(info, value, charOffset);

+Token skipToEof(Token token) {

+ while (!token.isEof) {

+ token = token.next;

+ }

+ return token;

+String closeBraceFor(String openBrace) {

+ return const {

+ '(': ')',

+ '[': ']',

+ '{': '}',

+ '<': '>',

+ r'${': '}',

+ }[openBrace];

}