Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(349)

Unified Diff: sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart

Issue 27510003: Scanner for UTF-8 byte arrays (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: fixes compiler tests Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart
diff --git a/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart b/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart
index 01f8e9e8159abd4b2dc3286e9eafff04d9858afc..5eb9129e86f11e8cfd69a843048d1135409a8425 100644
--- a/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart
+++ b/sdk/lib/_internal/compiler/implementation/scanner/array_based_scanner.dart
@@ -2,35 +2,49 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-part of scanner_implementation;
-
-abstract
-class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {
- int get charOffset => byteOffset + extraCharOffset;
- final Token tokens;
- Token tail;
- int tokenStart;
- int byteOffset;
- final bool includeComments;
-
- /** Since the input is UTF8, some characters are represented by more
- * than one byte. [extraCharOffset] tracks the difference. */
- int extraCharOffset;
+part of scanner;
+
+abstract class ArrayBasedScanner extends AbstractScanner {
+ ArrayBasedScanner(SourceFile file, bool includeComments)
+ : super(file, includeComments);
+
+ /**
+ * The stack of open groups, e.g [: { ... ( .. :]
+ * Each BeginGroupToken has a pointer to the token where the group
+ * ends. This field is set when scanning the end group token.
+ */
Link<BeginGroupToken> groupingStack = const Link<BeginGroupToken>();
- ArrayBasedScanner(this.includeComments)
- : this.extraCharOffset = 0,
- this.tokenStart = -1,
- this.byteOffset = -1,
- this.tokens = new Token(EOF_INFO, -1) {
- this.tail = this.tokens;
+ /**
+ * Appends a token whose kind is determined by [info] and content is defined
+ * by the String [value].
+ *
+ * This method is invoked for class names, field names, method names, types,
+ * etc.
+ */
+ void appendStringToken(PrecedenceInfo info, String value) {
+ tail.next = new StringToken.fromString(info, value, tokenStart, true);
+ tail = tail.next;
}
- int advance() {
- int next = nextByte();
- return next;
+ /**
+ * Appends a fixed token whose kind and content is determined by [info].
+ * Appends an *operator* token from [info].
+ *
+ * An operator token represent operators like ':', '.', ';', '&&', '==', '--',
+ * '=>', etc.
+ */
+ void appendPrecedenceToken(PrecedenceInfo info) {
+ tail.next = new SymbolToken(info, tokenStart);
+ tail = tail.next;
}
+ /**
+ * Appends a fixed token based on whether the current char is [choice] or not.
+ * If the current char is [choice] a fixed token whose kind and content
+ * is determined by [yes] is appended, otherwise a fixed token whose kind
+ * and content is determined by [no] is appended.
+ */
int select(int choice, PrecedenceInfo yes, PrecedenceInfo no) {
int next = advance();
if (identical(next, choice)) {
@@ -42,27 +56,22 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {
}
}
- void appendPrecedenceToken(PrecedenceInfo info) {
- tail.next = new Token(info, tokenStart);
- tail = tail.next;
- }
-
- void appendStringToken(PrecedenceInfo info, String value) {
- tail.next = new StringToken(info, value, tokenStart);
- tail = tail.next;
- }
-
+ /**
+ * Appends a keyword token whose kind is determined by [keyword].
+ */
void appendKeywordToken(Keyword keyword) {
String syntax = keyword.syntax;
-
// Type parameters and arguments cannot contain 'this' or 'super'.
- if (identical(syntax, 'this') || identical(syntax, 'super')) discardOpenLt();
+ if (identical(syntax, 'this') || identical(syntax, 'super')) {
+ discardOpenLt();
+ }
tail.next = new KeywordToken(keyword, tokenStart);
tail = tail.next;
}
void appendEofToken() {
- tail.next = new Token(EOF_INFO, charOffset);
+ beginToken();
+ tail.next = new SymbolToken(EOF_INFO, tokenStart);
tail = tail.next;
// EOF points to itself so there's always infinite look-ahead.
tail.next = tail;
@@ -73,37 +82,54 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {
}
}
- void beginToken() {
- tokenStart = charOffset;
- }
-
- Token firstToken() {
- return tokens.next;
- }
-
- Token previousToken() {
- return tail;
- }
-
- void addToCharOffset(int offset) {
- extraCharOffset += offset;
+ /**
+ * Notifies scanning a whitespace character. Note that [appendWhiteSpace] is
+ * not always invoked for [$SPACE] characters.
+ *
+ * This method is used by the scanners to track line breaks and create the
+ * [lineStarts] map.
+ */
+ void appendWhiteSpace(int next) {
+ if (next == $LF && file != null) {
+ lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.
+ }
}
- void appendWhiteSpace(int next) {
- // Do nothing, we don't collect white space.
+ /**
+ * Notifies on [$LF] characters in multi-line commends or strings.
ngeoffray 2013/10/18 10:19:37 commends -> comments
lukas 2013/10/24 16:48:36 Done.
+ *
+ * This method is used by the scanners to track line breaks and create the
+ * [lineStarts] map.
+ */
+ void lineFeedInMultiline() {
+ if (file != null) {
+ lineStarts.add(stringOffset + 1);
+ }
}
- void appendBeginGroup(PrecedenceInfo info, String value) {
- Token token = new BeginGroupToken(info, value, tokenStart);
+ /**
+ * Appends a token that begins a new group, represented by [value].
+ * Group begin tokens are '{', '(', '[' and '${'.
+ */
+ void appendBeginGroup(PrecedenceInfo info) {
+ Token token = new BeginGroupToken(info, tokenStart);
tail.next = token;
tail = tail.next;
+
+ // { ( [ ${ cannot appear inside a type parameters / arguments.
if (!identical(info.kind, LT_TOKEN)) discardOpenLt();
groupingStack = groupingStack.prepend(token);
}
- int appendEndGroup(PrecedenceInfo info, String value, int openKind) {
- assert(!identical(openKind, LT_TOKEN));
- appendStringToken(info, value);
+ /**
+ * Appends a token that begins a ends group, represented by [value].
ngeoffray 2013/10/18 10:19:37 a ends -> an end
lukas 2013/10/24 16:48:36 Done.
+ * It handles the group end tokens '}', ')' and ']'. The tokens '>' and
+ * '>>' are handled separately bo [appendGt] and [appendGtGt].
+ */
+ int appendEndGroup(PrecedenceInfo info, int openKind) {
+ assert(!identical(openKind, LT_TOKEN)); // openKind is < for > and >>
+ appendPrecedenceToken(info);
+ // Don't report unmatched errors for <; it is also the less-than operator.
discardOpenLt();
if (groupingStack.isEmpty) {
return advance();
@@ -113,7 +139,8 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {
if (!identical(openKind, OPEN_CURLY_BRACKET_TOKEN) ||
!identical(begin.kind, STRING_INTERPOLATION_TOKEN)) {
// Not ending string interpolation.
- return error(new SourceString('Unmatched ${begin.stringValue}'));
+ unmatchedBeginGroup(begin);
+ return advance();
}
// We're ending an interpolated expression.
begin.endGroup = tail;
@@ -127,21 +154,13 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {
return advance();
}
- void appendGt(PrecedenceInfo info, String value) {
- appendStringToken(info, value);
- if (groupingStack.isEmpty) return;
- if (identical(groupingStack.head.kind, LT_TOKEN)) {
- groupingStack.head.endGroup = tail;
- groupingStack = groupingStack.tail;
- }
- }
-
- void appendGtGt(PrecedenceInfo info, String value) {
- appendStringToken(info, value);
- if (groupingStack.isEmpty) return;
- if (identical(groupingStack.head.kind, LT_TOKEN)) {
- groupingStack = groupingStack.tail;
- }
+ /**
+ * Appends a token for '>'.
+ * This method does not issue unmatched errors, because > is also the
+ * greater-than operator. It does not necessarily have to close a group.
+ */
+ void appendGt(PrecedenceInfo info) {
+ appendPrecedenceToken(info);
if (groupingStack.isEmpty) return;
if (identical(groupingStack.head.kind, LT_TOKEN)) {
groupingStack.head.endGroup = tail;
@@ -149,14 +168,17 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {
}
}
- void appendGtGtGt(PrecedenceInfo info, String value) {
- appendStringToken(info, value);
- if (groupingStack.isEmpty) return;
- if (identical(groupingStack.head.kind, LT_TOKEN)) {
- groupingStack = groupingStack.tail;
- }
+ /**
+ * Appends a token for '>>'.
+ * This method does not issue unmatched errors, because >> is also the
+ * shift operator. It does not necessarily have to close a group.
+ */
+ void appendGtGt(PrecedenceInfo info) {
+ appendPrecedenceToken(info);
if (groupingStack.isEmpty) return;
if (identical(groupingStack.head.kind, LT_TOKEN)) {
+ // Don't assign endGroup: in "T<U<V>>", the '>>' token closes the outer
+ // '<', the inner '<' is left without endGroup.
groupingStack = groupingStack.tail;
}
if (groupingStack.isEmpty) return;
@@ -166,18 +188,27 @@ class ArrayBasedScanner<S extends SourceString> extends AbstractScanner<S> {
}
}
- void appendComment() {
+ void appendComment(start, bool asciiOnly) {
if (!includeComments) return;
- SourceString value = utf8String(tokenStart, -1);
- appendByteStringToken(COMMENT_INFO, value);
- }
-
+ appendSubstringToken(COMMENT_INFO, start, asciiOnly);
+ }
+
+ /**
+ * We call this method to discard '<' from the "grouping" stack
ngeoffray 2013/10/18 10:19:37 Replace 'We' by the actual callers.
lukas 2013/10/24 16:48:36 Done.
+ * (maintained by subclasses).
+ *
+ * [PartialParser.skipExpression] relies on the fact that we do not
+ * create groups for stuff like:
+ * [:a = b < c, d = e > f:].
+ *
+ * In other words, this method is called when the scanner recognizes
+ * something which cannot possibly be part of a type
+ * parameter/argument list.
+ */
void discardOpenLt() {
while (!groupingStack.isEmpty
&& identical(groupingStack.head.kind, LT_TOKEN)) {
groupingStack = groupingStack.tail;
}
}
-
- void unmatchedBeginGroup(BeginGroupToken begin);
-}
+}

Powered by Google App Engine
This is Rietveld 408576698