Index: pkg/dart_scanner/lib/src/token.dart |
diff --git a/pkg/dart_scanner/lib/src/token.dart b/pkg/dart_scanner/lib/src/token.dart |
new file mode 100644 |
index 0000000000000000000000000000000000000000..af31142ec0a5fac70a2f932ec347218462da0513 |
--- /dev/null |
+++ b/pkg/dart_scanner/lib/src/token.dart |
@@ -0,0 +1,430 @@ |
+// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+library dart2js.tokens; |
+ |
+import 'dart:collection' show HashSet; |
+import 'dart:convert' show UTF8; |
+ |
+import '../common.dart'; |
+import '../util/util.dart' show computeHashCode; |
+import 'keyword.dart' show Keyword; |
+import 'precedence.dart' show PrecedenceInfo; |
+import 'precedence_constants.dart' as Precedence show BAD_INPUT_INFO; |
+import 'token_constants.dart' as Tokens show IDENTIFIER_TOKEN; |
+ |
+/** |
+ * A token that doubles as a linked list. |
+ */ |
+abstract class Token implements Spannable { |
+ /** |
+ * The character offset of the start of this token within the source text. |
+ */ |
+ final int charOffset; |
+ |
+ Token(this.charOffset); |
+ |
+ /** |
+ * The next token in the token stream. |
+ */ |
+ Token next; |
+ |
+ /** |
+ * The precedence info for this token. [info] determines the kind and the |
+ * precedence level of this token. |
+ * |
+ * Defined as getter to save a field in the [KeywordToken] subclass. |
+ */ |
+ PrecedenceInfo get info; |
+ |
+ /** |
+ * The string represented by this token, a substring of the source code. |
+ * |
+ * For [StringToken]s the [value] includes the quotes, explicit escapes, etc. |
+ */ |
+ String get value; |
+ |
+ /** |
+ * For symbol and keyword tokens, returns the string value represented by this |
+ * token. For [StringToken]s this method returns [:null:]. |
+ * |
+ * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time |
+ * constant originating in the [PrecedenceInfo] or in the [Keyword] instance. |
+ * This allows testing for keywords and symbols using [:identical:], e.g., |
+ * [:identical('class', token.value):]. |
+ * |
+ * Note that returning [:null:] for string tokens is important to identify |
+ * symbols and keywords, we cannot use [value] instead. The string literal |
+ * "$a($b" |
+ * produces ..., SymbolToken($), StringToken(a), StringToken((), ... |
+ * |
+ * After parsing the identifier 'a', the parser tests for a function |
+ * declaration using [:identical(next.stringValue, '('):], which (rightfully) |
+ * returns false because stringValue returns [:null:]. |
+ */ |
+ String get stringValue; |
+ |
+ /** |
+ * The kind enum of this token as determined by its [info]. |
+ */ |
+ int get kind => info.kind; |
+ |
+ /** |
+ * The precedence level for this token. |
+ */ |
+ int get precedence => info.precedence; |
+ |
+ /** |
+ * True if this token is an identifier. Some keywords allowed as identifiers, |
+ * see implementation in [KeywordToken]. |
+ */ |
+ bool isIdentifier(); |
+ |
+ /** |
+ * Returns a textual representation of this token to be used for debugging |
+ * purposes. The resulting string might contain information about the |
+ * structure of the token, for example 'StringToken(foo)' for the identifier |
+ * token 'foo'. |
+ * |
+ * Use [value] for the text actually parsed by the token. |
+ */ |
+ String toString(); |
+ |
+ /** |
+ * The number of characters parsed by this token. |
+ */ |
+ int get charCount { |
+ if (info == Precedence.BAD_INPUT_INFO) { |
+ // This is a token that wraps around an error message. Return 1 |
+ // instead of the size of the length of the error message. |
+ return 1; |
+ } else { |
+ return value.length; |
+ } |
+ } |
+ |
+ /// The character offset of the end of this token within the source text. |
+ int get charEnd => charOffset + charCount; |
+ |
+ int get hashCode => computeHashCode(charOffset, info, value); |
+} |
+ |
+/// A pair of tokens marking the beginning and the end of a span. Use for error |
+/// reporting. |
+class TokenPair implements Spannable { |
+ final Token begin; |
+ final Token end; |
+ |
+ TokenPair(this.begin, this.end); |
+} |
+ |
+/** |
+ * A [SymbolToken] represents the symbol in its precendence info. |
+ * Also used for end of file with EOF_INFO. |
+ */ |
+class SymbolToken extends Token { |
+ final PrecedenceInfo info; |
+ |
+ SymbolToken(this.info, int charOffset) : super(charOffset); |
+ |
+ String get value => info.value; |
+ |
+ String get stringValue => info.value; |
+ |
+ bool isIdentifier() => false; |
+ |
+ String toString() => "SymbolToken($value)"; |
+} |
+ |
+/** |
+ * A [BeginGroupToken] represents a symbol that may be the beginning of |
+ * a pair of brackets, i.e., ( { [ < or ${ |
+ * The [endGroup] token points to the matching closing bracked in case |
+ * it can be identified during scanning. |
+ */ |
+class BeginGroupToken extends SymbolToken { |
+ Token endGroup; |
+ |
+ BeginGroupToken(PrecedenceInfo info, int charOffset) |
+ : super(info, charOffset); |
+} |
+ |
+/** |
+ * A keyword token. |
+ */ |
+class KeywordToken extends Token { |
+ final Keyword keyword; |
+ |
+ KeywordToken(this.keyword, int charOffset) : super(charOffset); |
+ |
+ PrecedenceInfo get info => keyword.info; |
+ |
+ String get value => keyword.syntax; |
+ |
+ String get stringValue => keyword.syntax; |
+ |
+ bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn; |
+ |
+ String toString() => "KeywordToken($value)"; |
+} |
+ |
+abstract class ErrorToken extends Token { |
+ ErrorToken(int charOffset) : super(charOffset); |
+ |
+ PrecedenceInfo get info => Precedence.BAD_INPUT_INFO; |
+ |
+ String get value { |
+ throw new SpannableAssertionFailure(this, assertionMessage); |
+ } |
+ |
+ String get stringValue => null; |
+ |
+ bool isIdentifier() => false; |
+ |
+ String get assertionMessage; |
+} |
+ |
+class BadInputToken extends ErrorToken { |
+ final int character; |
+ |
+ BadInputToken(this.character, int charOffset) : super(charOffset); |
+ |
+ String toString() => "BadInputToken($character)"; |
+ |
+ String get assertionMessage { |
+ return 'Character U+${character.toRadixString(16)} not allowed here.'; |
+ } |
+} |
+ |
+class UnterminatedToken extends ErrorToken { |
+ final String start; |
+ final int endOffset; |
+ |
+ UnterminatedToken(this.start, int charOffset, this.endOffset) |
+ : super(charOffset); |
+ |
+ String toString() => "UnterminatedToken($start)"; |
+ |
+ String get assertionMessage => "'$start' isn't terminated."; |
+ |
+ int get charCount => endOffset - charOffset; |
+} |
+ |
+class UnmatchedToken extends ErrorToken { |
+ final BeginGroupToken begin; |
+ |
+ UnmatchedToken(BeginGroupToken begin) |
+ : this.begin = begin, |
+ super(begin.charOffset); |
+ |
+ String toString() => "UnmatchedToken(${begin.value})"; |
+ |
+ String get assertionMessage => "'$begin' isn't closed."; |
+} |
+ |
+/** |
+ * A String-valued token. Represents identifiers, string literals, |
+ * number literals, comments, and error tokens, using the corresponding |
+ * precedence info. |
+ */ |
+class StringToken extends Token { |
+ /** |
+ * The length threshold above which substring tokens are computed lazily. |
+ * |
+ * For string tokens that are substrings of the program source, the actual |
+ * substring extraction is performed lazily. This is beneficial because |
+ * not all scanned code is actually used. For unused parts, the substrings |
+ * are never computed and allocated. |
+ */ |
+ static const int LAZY_THRESHOLD = 4; |
+ |
+ var /* String | LazySubtring */ valueOrLazySubstring; |
+ |
+ final PrecedenceInfo info; |
+ |
+ /** |
+ * Creates a non-lazy string token. If [canonicalize] is true, the string |
+ * is canonicalized before the token is created. |
+ */ |
+ StringToken.fromString(this.info, String value, int charOffset, |
+ {bool canonicalize: false}) |
+ : valueOrLazySubstring = canonicalizedString(value, canonicalize), |
+ super(charOffset); |
+ |
+ /** |
+ * Creates a lazy string token. If [canonicalize] is true, the string |
+ * is canonicalized before the token is created. |
+ */ |
+ StringToken.fromSubstring( |
+ this.info, String data, int start, int end, int charOffset, |
+ {bool canonicalize: false}) |
+ : super(charOffset) { |
+ int length = end - start; |
+ if (length <= LAZY_THRESHOLD) { |
+ valueOrLazySubstring = |
+ canonicalizedString(data.substring(start, end), canonicalize); |
+ } else { |
+ valueOrLazySubstring = |
+ new LazySubstring(data, start, length, canonicalize); |
+ } |
+ } |
+ |
+ /** |
+ * Creates a lazy string token. If [asciiOnly] is false, the byte array |
+ * is passed through a UTF-8 decoder. |
+ */ |
+ StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end, |
+ bool asciiOnly, int charOffset) |
+ : super(charOffset) { |
+ int length = end - start; |
+ if (length <= LAZY_THRESHOLD) { |
+ valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly); |
+ } else { |
+ valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly); |
+ } |
+ } |
+ |
+ String get value { |
+ if (valueOrLazySubstring is String) { |
+ return valueOrLazySubstring; |
+ } else { |
+ assert(valueOrLazySubstring is LazySubstring); |
+ var data = valueOrLazySubstring.data; |
+ int start = valueOrLazySubstring.start; |
+ int end = start + valueOrLazySubstring.length; |
+ if (data is String) { |
+ valueOrLazySubstring = canonicalizedString( |
+ data.substring(start, end), valueOrLazySubstring.boolValue); |
+ } else { |
+ valueOrLazySubstring = |
+ decodeUtf8(data, start, end, valueOrLazySubstring.boolValue); |
+ } |
+ return valueOrLazySubstring; |
+ } |
+ } |
+ |
+ /// See [Token.stringValue] for an explanation. |
+ String get stringValue => null; |
+ |
+ bool isIdentifier() => identical(kind, Tokens.IDENTIFIER_TOKEN); |
+ |
+ String toString() => "StringToken($value)"; |
+ |
+ static final HashSet<String> canonicalizedSubstrings = new HashSet<String>(); |
+ |
+ static String canonicalizedString(String s, bool canonicalize) { |
+ if (!canonicalize) return s; |
+ var result = canonicalizedSubstrings.lookup(s); |
+ if (result != null) return result; |
+ canonicalizedSubstrings.add(s); |
+ return s; |
+ } |
+ |
+ static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) { |
+ var s; |
+ if (asciiOnly) { |
+ s = new String.fromCharCodes(data, start, end); |
+ } else { |
+ s = UTF8.decoder.convert(data, start, end); |
+ } |
+ return canonicalizedString(s, true); |
+ } |
+} |
+ |
+/** |
+ * This class represents the necessary information to compute a substring |
+ * lazily. The substring can either originate from a string or from |
+ * a [:List<int>:] of UTF-8 bytes. |
+ */ |
+abstract class LazySubstring { |
+ /** The original data, either a string or a List<int> */ |
+ get data; |
+ |
+ int get start; |
+ int get length; |
+ |
+ /** |
+ * If this substring is based on a String, the [boolValue] indicates wheter |
+ * the resulting substring should be canonicalized. |
+ * |
+ * For substrings based on a byte array, the [boolValue] is true if the |
+ * array only holds ASCII characters. The resulting substring will be |
+ * canonicalized after decoding. |
+ */ |
+ bool get boolValue; |
+ |
+ LazySubstring.internal(); |
+ |
+ factory LazySubstring(data, int start, int length, bool b) { |
+ // See comment on [CompactLazySubstring]. |
+ if (start < 0x100000 && length < 0x200) { |
+ int fields = (start << 9); |
+ fields = fields | length; |
+ fields = fields << 1; |
+ if (b) fields |= 1; |
+ return new CompactLazySubstring(data, fields); |
+ } else { |
+ return new FullLazySubstring(data, start, length, b); |
+ } |
+ } |
+} |
+ |
+/** |
+ * This class encodes [start], [length] and [boolValue] in a single |
+ * 30 bit integer. It uses 20 bits for [start], which covers source files |
+ * of 1MB. [length] has 9 bits, which covers 512 characters. |
+ * |
+ * The file html_dart2js.dart is currently around 1MB. |
+ */ |
+class CompactLazySubstring extends LazySubstring { |
+ final data; |
+ final int fields; |
+ |
+ CompactLazySubstring(this.data, this.fields) : super.internal(); |
+ |
+ int get start => fields >> 10; |
+ int get length => (fields >> 1) & 0x1ff; |
+ bool get boolValue => (fields & 1) == 1; |
+} |
+ |
+class FullLazySubstring extends LazySubstring { |
+ final data; |
+ final int start; |
+ final int length; |
+ final bool boolValue; |
+ FullLazySubstring(this.data, this.start, this.length, this.boolValue) |
+ : super.internal(); |
+} |
+ |
+bool isUserDefinableOperator(String value) { |
+ return isBinaryOperator(value) || |
+ isMinusOperator(value) || |
+ isTernaryOperator(value) || |
+ isUnaryOperator(value); |
+} |
+ |
+bool isUnaryOperator(String value) => value == '~'; |
+ |
+bool isBinaryOperator(String value) { |
+ return value == '==' || |
+ value == '[]' || |
+ value == '*' || |
+ value == '/' || |
+ value == '%' || |
+ value == '~/' || |
+ value == '+' || |
+ value == '<<' || |
+ value == '>>' || |
+ value == '>=' || |
+ value == '>' || |
+ value == '<=' || |
+ value == '<' || |
+ value == '&' || |
+ value == '^' || |
+ value == '|'; |
+} |
+ |
+bool isTernaryOperator(String value) => value == '[]='; |
+ |
+bool isMinusOperator(String value) => value == '-'; |