sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart - Issue 27510003: Scanner for UTF-8 byte arrays

Unified Diff: sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: fixes compiler tests Created 7 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« sdk/lib/_internal/compiler/implementation/js_backend/backend.dart ('K') | « sdk/lib/_internal/compiler/implementation/patch_parser.dart ('k') | sdk/lib/_internal/compiler/implementation/scanner/parser.dart » ('j') | sdk/lib/_internal/compiler/implementation/scanner/scanner.dart » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

diff --git a/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart b/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

index 01f8e9e8159abd4b2dc3286e9eafff04d9858afc..5eb9129e86f11e8cfd69a843048d1135409a8425 100644

--- a/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

+++ b/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

@@ -2,35 +2,49 @@

// BSD-style license that can be found in the LICENSE file.

-part of scanner_implementation;

-abstract

-class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {

- int get charOffset => byteOffset + extraCharOffset;

- final Token tokens;

- Token tail;

- int tokenStart;

- int byteOffset;

- final bool includeComments;

- /** Since the input is UTF8, some characters are represented by more

- * than one byte. [extraCharOffset] tracks the difference. */

- int extraCharOffset;

+part of scanner;

+abstract class ArrayBasedScanner extends AbstractScanner {

+ ArrayBasedScanner(SourceFile file, bool includeComments)

+ : super(file, includeComments);

+ /**

+ * The stack of open groups, e.g [: { ... ( .. :]

+ * Each BeginGroupToken has a pointer to the token where the group

+ * ends. This field is set when scanning the end group token.

+ */

Link<BeginGroupToken> groupingStack = const Link<BeginGroupToken>();

- ArrayBasedScanner(this.includeComments)

- : this.extraCharOffset = 0,

- this.tokenStart = -1,

- this.byteOffset = -1,

- this.tokens = new Token(EOF_INFO, -1) {

- this.tail = this.tokens;

+ /**

+ * Appends a token whose kind is determined by [info] and content is defined

+ * by the String [value].

+ *

+ * This method is invoked for class names, field names, method names, types,

+ * etc.

+ */

+ void appendStringToken(PrecedenceInfo info, String value) {

+ tail.next = new StringToken.fromString(info, value, tokenStart, true);

+ tail = tail.next;

}

- int advance() {

- int next = nextByte();

- return next;

+ /**

+ * Appends a fixed token whose kind and content is determined by [info].

+ * Appends an *operator* token from [info].

+ *

+ * An operator token represent operators like ':', '.', ';', '&&', '==', '--',

+ * '=>', etc.

+ */

+ void appendPrecedenceToken(PrecedenceInfo info) {

+ tail.next = new SymbolToken(info, tokenStart);

+ tail = tail.next;

}

+ /**

+ * Appends a fixed token based on whether the current char is [choice] or not.

+ * If the current char is [choice] a fixed token whose kind and content

+ * is determined by [yes] is appended, otherwise a fixed token whose kind

+ * and content is determined by [no] is appended.

+ */

int select(int choice, PrecedenceInfo yes, PrecedenceInfo no) {

int next = advance();

if (identical(next, choice)) {

@@ -42,27 +56,22 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {

}

- void appendPrecedenceToken(PrecedenceInfo info) {

- tail.next = new Token(info, tokenStart);

- tail = tail.next;

- }

- void appendStringToken(PrecedenceInfo info, String value) {

- tail.next = new StringToken(info, value, tokenStart);

- tail = tail.next;

- }

+ /**

+ * Appends a keyword token whose kind is determined by [keyword].

+ */

void appendKeywordToken(Keyword keyword) {

String syntax = keyword.syntax;

// Type parameters and arguments cannot contain 'this' or 'super'.

- if (identical(syntax, 'this') || identical(syntax, 'super')) discardOpenLt();

+ if (identical(syntax, 'this') || identical(syntax, 'super')) {

+ discardOpenLt();

+ }

tail.next = new KeywordToken(keyword, tokenStart);

tail = tail.next;

}

void appendEofToken() {

- tail.next = new Token(EOF_INFO, charOffset);

+ beginToken();

+ tail.next = new SymbolToken(EOF_INFO, tokenStart);

tail = tail.next;

// EOF points to itself so there's always infinite look-ahead.

tail.next = tail;

@@ -73,37 +82,54 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {

}

- void beginToken() {

- tokenStart = charOffset;

- }

- Token firstToken() {

- return tokens.next;

- }

- Token previousToken() {

- return tail;

- }

- void addToCharOffset(int offset) {

- extraCharOffset += offset;

+ /**

+ * Notifies scanning a whitespace character. Note that [appendWhiteSpace] is

+ * not always invoked for [$SPACE] characters.

+ *

+ * This method is used by the scanners to track line breaks and create the

+ * [lineStarts] map.

+ */

+ void appendWhiteSpace(int next) {

+ if (next == $LF && file != null) {

+ lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.

+ }

}

- void appendWhiteSpace(int next) {

- // Do nothing, we don't collect white space.

+ /**

+ * Notifies on [$LF] characters in multi-line commends or strings.

ngeoffray 2013/10/18 10:19:37 commends -> comments

lukas 2013/10/24 16:48:36 Done.

+ *

+ * This method is used by the scanners to track line breaks and create the

+ * [lineStarts] map.

+ */

+ void lineFeedInMultiline() {

+ if (file != null) {

+ lineStarts.add(stringOffset + 1);

+ }

}

- void appendBeginGroup(PrecedenceInfo info, String value) {

- Token token = new BeginGroupToken(info, value, tokenStart);

+ /**

+ * Appends a token that begins a new group, represented by [value].

+ * Group begin tokens are '{', '(', '[' and '${'.

+ */

+ void appendBeginGroup(PrecedenceInfo info) {

+ Token token = new BeginGroupToken(info, tokenStart);

tail.next = token;

tail = tail.next;

+ // { ( [ ${ cannot appear inside a type parameters / arguments.

if (!identical(info.kind, LT_TOKEN)) discardOpenLt();

groupingStack = groupingStack.prepend(token);

}

- int appendEndGroup(PrecedenceInfo info, String value, int openKind) {

- assert(!identical(openKind, LT_TOKEN));

- appendStringToken(info, value);

+ /**

+ * Appends a token that begins a ends group, represented by [value].

ngeoffray 2013/10/18 10:19:37 a ends -> an end

lukas 2013/10/24 16:48:36 Done.

+ * It handles the group end tokens '}', ')' and ']'. The tokens '>' and

+ * '>>' are handled separately bo [appendGt] and [appendGtGt].

+ */

+ int appendEndGroup(PrecedenceInfo info, int openKind) {

+ assert(!identical(openKind, LT_TOKEN)); // openKind is < for > and >>

+ appendPrecedenceToken(info);

+ // Don't report unmatched errors for <; it is also the less-than operator.

discardOpenLt();

if (groupingStack.isEmpty) {

return advance();

@@ -113,7 +139,8 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {

if (!identical(openKind, OPEN_CURLY_BRACKET_TOKEN) ||

!identical(begin.kind, STRING_INTERPOLATION_TOKEN)) {

// Not ending string interpolation.

- return error(new SourceString('Unmatched ${begin.stringValue}'));

+ unmatchedBeginGroup(begin);

+ return advance();

}

// We're ending an interpolated expression.

begin.endGroup = tail;

@@ -127,21 +154,13 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {

return advance();

}

- void appendGt(PrecedenceInfo info, String value) {

- appendStringToken(info, value);

- if (groupingStack.isEmpty) return;

- if (identical(groupingStack.head.kind, LT_TOKEN)) {

- groupingStack.head.endGroup = tail;

- groupingStack = groupingStack.tail;

- }

- void appendGtGt(PrecedenceInfo info, String value) {

- appendStringToken(info, value);

- if (groupingStack.isEmpty) return;

- if (identical(groupingStack.head.kind, LT_TOKEN)) {

- groupingStack = groupingStack.tail;

- }

+ /**

+ * Appends a token for '>'.

+ * This method does not issue unmatched errors, because > is also the

+ * greater-than operator. It does not necessarily have to close a group.

+ */

+ void appendGt(PrecedenceInfo info) {

+ appendPrecedenceToken(info);

if (groupingStack.isEmpty) return;

if (identical(groupingStack.head.kind, LT_TOKEN)) {

groupingStack.head.endGroup = tail;

@@ -149,14 +168,17 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {

}

- void appendGtGtGt(PrecedenceInfo info, String value) {

- appendStringToken(info, value);

- if (groupingStack.isEmpty) return;

- if (identical(groupingStack.head.kind, LT_TOKEN)) {

- groupingStack = groupingStack.tail;

- }

+ /**

+ * Appends a token for '>>'.

+ * This method does not issue unmatched errors, because >> is also the

+ * shift operator. It does not necessarily have to close a group.

+ */

+ void appendGtGt(PrecedenceInfo info) {

+ appendPrecedenceToken(info);

if (groupingStack.isEmpty) return;

if (identical(groupingStack.head.kind, LT_TOKEN)) {

+ // Don't assign endGroup: in "T<U<V>>", the '>>' token closes the outer

+ // '<', the inner '<' is left without endGroup.

groupingStack = groupingStack.tail;

}

if (groupingStack.isEmpty) return;

@@ -166,18 +188,27 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {

}

- void appendComment() {

+ void appendComment(start, bool asciiOnly) {

if (!includeComments) return;

- SourceString value = utf8String(tokenStart, -1);

- appendByteStringToken(COMMENT_INFO, value);

- }

+ appendSubstringToken(COMMENT_INFO, start, asciiOnly);

+ }

+ /**

+ * We call this method to discard '<' from the "grouping" stack

ngeoffray 2013/10/18 10:19:37 Replace 'We' by the actual callers.

lukas 2013/10/24 16:48:36 Done.

+ * (maintained by subclasses).

+ *

+ * [PartialParser.skipExpression] relies on the fact that we do not

+ * create groups for stuff like:

+ * [:a = b < c, d = e > f:].

+ *

+ * In other words, this method is called when the scanner recognizes

+ * something which cannot possibly be part of a type

+ * parameter/argument list.

+ */

void discardOpenLt() {

while (!groupingStack.isEmpty

&& identical(groupingStack.head.kind, LT_TOKEN)) {

groupingStack = groupingStack.tail;

}

- void unmatchedBeginGroup(BeginGroupToken begin);