Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Unified Diff: pkg/dart_scanner/lib/src/token.dart

Issue 2621153006: Copy scanner and parser to own packages. (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « pkg/dart_scanner/lib/src/precedence.dart ('k') | pkg/dart_scanner/lib/src/token_constants.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: pkg/dart_scanner/lib/src/token.dart
diff --git a/pkg/dart_scanner/lib/src/token.dart b/pkg/dart_scanner/lib/src/token.dart
new file mode 100644
index 0000000000000000000000000000000000000000..af31142ec0a5fac70a2f932ec347218462da0513
--- /dev/null
+++ b/pkg/dart_scanner/lib/src/token.dart
@@ -0,0 +1,430 @@
+// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library dart2js.tokens;
+
+import 'dart:collection' show HashSet;
+import 'dart:convert' show UTF8;
+
+import '../common.dart';
+import '../util/util.dart' show computeHashCode;
+import 'keyword.dart' show Keyword;
+import 'precedence.dart' show PrecedenceInfo;
+import 'precedence_constants.dart' as Precedence show BAD_INPUT_INFO;
+import 'token_constants.dart' as Tokens show IDENTIFIER_TOKEN;
+
+/**
+ * A token that doubles as a linked list.
+ */
+abstract class Token implements Spannable {
+ /**
+ * The character offset of the start of this token within the source text.
+ */
+ final int charOffset;
+
+ Token(this.charOffset);
+
+ /**
+ * The next token in the token stream.
+ */
+ Token next;
+
+ /**
+ * The precedence info for this token. [info] determines the kind and the
+ * precedence level of this token.
+ *
+ * Defined as getter to save a field in the [KeywordToken] subclass.
+ */
+ PrecedenceInfo get info;
+
+ /**
+ * The string represented by this token, a substring of the source code.
+ *
+ * For [StringToken]s the [value] includes the quotes, explicit escapes, etc.
+ */
+ String get value;
+
+ /**
+ * For symbol and keyword tokens, returns the string value represented by this
+ * token. For [StringToken]s this method returns [:null:].
+ *
+ * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time
+ * constant originating in the [PrecedenceInfo] or in the [Keyword] instance.
+ * This allows testing for keywords and symbols using [:identical:], e.g.,
+ * [:identical('class', token.value):].
+ *
+ * Note that returning [:null:] for string tokens is important to identify
+ * symbols and keywords, we cannot use [value] instead. The string literal
+ * "$a($b"
+ * produces ..., SymbolToken($), StringToken(a), StringToken((), ...
+ *
+ * After parsing the identifier 'a', the parser tests for a function
+ * declaration using [:identical(next.stringValue, '('):], which (rightfully)
+ * returns false because stringValue returns [:null:].
+ */
+ String get stringValue;
+
+ /**
+ * The kind enum of this token as determined by its [info].
+ */
+ int get kind => info.kind;
+
+ /**
+ * The precedence level for this token.
+ */
+ int get precedence => info.precedence;
+
+ /**
+ * True if this token is an identifier. Some keywords allowed as identifiers,
+ * see implementation in [KeywordToken].
+ */
+ bool isIdentifier();
+
+ /**
+ * Returns a textual representation of this token to be used for debugging
+ * purposes. The resulting string might contain information about the
+ * structure of the token, for example 'StringToken(foo)' for the identifier
+ * token 'foo'.
+ *
+ * Use [value] for the text actually parsed by the token.
+ */
+ String toString();
+
+ /**
+ * The number of characters parsed by this token.
+ */
+ int get charCount {
+ if (info == Precedence.BAD_INPUT_INFO) {
+ // This is a token that wraps around an error message. Return 1
+ // instead of the size of the length of the error message.
+ return 1;
+ } else {
+ return value.length;
+ }
+ }
+
+ /// The character offset of the end of this token within the source text.
+ int get charEnd => charOffset + charCount;
+
+ int get hashCode => computeHashCode(charOffset, info, value);
+}
+
+/// A pair of tokens marking the beginning and the end of a span. Use for error
+/// reporting.
+class TokenPair implements Spannable {
+ final Token begin;
+ final Token end;
+
+ TokenPair(this.begin, this.end);
+}
+
+/**
+ * A [SymbolToken] represents the symbol in its precendence info.
+ * Also used for end of file with EOF_INFO.
+ */
+class SymbolToken extends Token {
+ final PrecedenceInfo info;
+
+ SymbolToken(this.info, int charOffset) : super(charOffset);
+
+ String get value => info.value;
+
+ String get stringValue => info.value;
+
+ bool isIdentifier() => false;
+
+ String toString() => "SymbolToken($value)";
+}
+
+/**
+ * A [BeginGroupToken] represents a symbol that may be the beginning of
+ * a pair of brackets, i.e., ( { [ < or ${
+ * The [endGroup] token points to the matching closing bracked in case
+ * it can be identified during scanning.
+ */
+class BeginGroupToken extends SymbolToken {
+ Token endGroup;
+
+ BeginGroupToken(PrecedenceInfo info, int charOffset)
+ : super(info, charOffset);
+}
+
+/**
+ * A keyword token.
+ */
+class KeywordToken extends Token {
+ final Keyword keyword;
+
+ KeywordToken(this.keyword, int charOffset) : super(charOffset);
+
+ PrecedenceInfo get info => keyword.info;
+
+ String get value => keyword.syntax;
+
+ String get stringValue => keyword.syntax;
+
+ bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn;
+
+ String toString() => "KeywordToken($value)";
+}
+
+abstract class ErrorToken extends Token {
+ ErrorToken(int charOffset) : super(charOffset);
+
+ PrecedenceInfo get info => Precedence.BAD_INPUT_INFO;
+
+ String get value {
+ throw new SpannableAssertionFailure(this, assertionMessage);
+ }
+
+ String get stringValue => null;
+
+ bool isIdentifier() => false;
+
+ String get assertionMessage;
+}
+
+class BadInputToken extends ErrorToken {
+ final int character;
+
+ BadInputToken(this.character, int charOffset) : super(charOffset);
+
+ String toString() => "BadInputToken($character)";
+
+ String get assertionMessage {
+ return 'Character U+${character.toRadixString(16)} not allowed here.';
+ }
+}
+
+class UnterminatedToken extends ErrorToken {
+ final String start;
+ final int endOffset;
+
+ UnterminatedToken(this.start, int charOffset, this.endOffset)
+ : super(charOffset);
+
+ String toString() => "UnterminatedToken($start)";
+
+ String get assertionMessage => "'$start' isn't terminated.";
+
+ int get charCount => endOffset - charOffset;
+}
+
+class UnmatchedToken extends ErrorToken {
+ final BeginGroupToken begin;
+
+ UnmatchedToken(BeginGroupToken begin)
+ : this.begin = begin,
+ super(begin.charOffset);
+
+ String toString() => "UnmatchedToken(${begin.value})";
+
+ String get assertionMessage => "'$begin' isn't closed.";
+}
+
+/**
+ * A String-valued token. Represents identifiers, string literals,
+ * number literals, comments, and error tokens, using the corresponding
+ * precedence info.
+ */
+class StringToken extends Token {
+ /**
+ * The length threshold above which substring tokens are computed lazily.
+ *
+ * For string tokens that are substrings of the program source, the actual
+ * substring extraction is performed lazily. This is beneficial because
+ * not all scanned code is actually used. For unused parts, the substrings
+ * are never computed and allocated.
+ */
+ static const int LAZY_THRESHOLD = 4;
+
+ var /* String | LazySubtring */ valueOrLazySubstring;
+
+ final PrecedenceInfo info;
+
+ /**
+ * Creates a non-lazy string token. If [canonicalize] is true, the string
+ * is canonicalized before the token is created.
+ */
+ StringToken.fromString(this.info, String value, int charOffset,
+ {bool canonicalize: false})
+ : valueOrLazySubstring = canonicalizedString(value, canonicalize),
+ super(charOffset);
+
+ /**
+ * Creates a lazy string token. If [canonicalize] is true, the string
+ * is canonicalized before the token is created.
+ */
+ StringToken.fromSubstring(
+ this.info, String data, int start, int end, int charOffset,
+ {bool canonicalize: false})
+ : super(charOffset) {
+ int length = end - start;
+ if (length <= LAZY_THRESHOLD) {
+ valueOrLazySubstring =
+ canonicalizedString(data.substring(start, end), canonicalize);
+ } else {
+ valueOrLazySubstring =
+ new LazySubstring(data, start, length, canonicalize);
+ }
+ }
+
+ /**
+ * Creates a lazy string token. If [asciiOnly] is false, the byte array
+ * is passed through a UTF-8 decoder.
+ */
+ StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end,
+ bool asciiOnly, int charOffset)
+ : super(charOffset) {
+ int length = end - start;
+ if (length <= LAZY_THRESHOLD) {
+ valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly);
+ } else {
+ valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly);
+ }
+ }
+
+ String get value {
+ if (valueOrLazySubstring is String) {
+ return valueOrLazySubstring;
+ } else {
+ assert(valueOrLazySubstring is LazySubstring);
+ var data = valueOrLazySubstring.data;
+ int start = valueOrLazySubstring.start;
+ int end = start + valueOrLazySubstring.length;
+ if (data is String) {
+ valueOrLazySubstring = canonicalizedString(
+ data.substring(start, end), valueOrLazySubstring.boolValue);
+ } else {
+ valueOrLazySubstring =
+ decodeUtf8(data, start, end, valueOrLazySubstring.boolValue);
+ }
+ return valueOrLazySubstring;
+ }
+ }
+
+ /// See [Token.stringValue] for an explanation.
+ String get stringValue => null;
+
+ bool isIdentifier() => identical(kind, Tokens.IDENTIFIER_TOKEN);
+
+ String toString() => "StringToken($value)";
+
+ static final HashSet<String> canonicalizedSubstrings = new HashSet<String>();
+
+ static String canonicalizedString(String s, bool canonicalize) {
+ if (!canonicalize) return s;
+ var result = canonicalizedSubstrings.lookup(s);
+ if (result != null) return result;
+ canonicalizedSubstrings.add(s);
+ return s;
+ }
+
+ static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) {
+ var s;
+ if (asciiOnly) {
+ s = new String.fromCharCodes(data, start, end);
+ } else {
+ s = UTF8.decoder.convert(data, start, end);
+ }
+ return canonicalizedString(s, true);
+ }
+}
+
+/**
+ * This class represents the necessary information to compute a substring
+ * lazily. The substring can either originate from a string or from
+ * a [:List<int>:] of UTF-8 bytes.
+ */
+abstract class LazySubstring {
+ /** The original data, either a string or a List<int> */
+ get data;
+
+ int get start;
+ int get length;
+
+ /**
+ * If this substring is based on a String, the [boolValue] indicates wheter
+ * the resulting substring should be canonicalized.
+ *
+ * For substrings based on a byte array, the [boolValue] is true if the
+ * array only holds ASCII characters. The resulting substring will be
+ * canonicalized after decoding.
+ */
+ bool get boolValue;
+
+ LazySubstring.internal();
+
+ factory LazySubstring(data, int start, int length, bool b) {
+ // See comment on [CompactLazySubstring].
+ if (start < 0x100000 && length < 0x200) {
+ int fields = (start << 9);
+ fields = fields | length;
+ fields = fields << 1;
+ if (b) fields |= 1;
+ return new CompactLazySubstring(data, fields);
+ } else {
+ return new FullLazySubstring(data, start, length, b);
+ }
+ }
+}
+
+/**
+ * This class encodes [start], [length] and [boolValue] in a single
+ * 30 bit integer. It uses 20 bits for [start], which covers source files
+ * of 1MB. [length] has 9 bits, which covers 512 characters.
+ *
+ * The file html_dart2js.dart is currently around 1MB.
+ */
+class CompactLazySubstring extends LazySubstring {
+ final data;
+ final int fields;
+
+ CompactLazySubstring(this.data, this.fields) : super.internal();
+
+ int get start => fields >> 10;
+ int get length => (fields >> 1) & 0x1ff;
+ bool get boolValue => (fields & 1) == 1;
+}
+
+class FullLazySubstring extends LazySubstring {
+ final data;
+ final int start;
+ final int length;
+ final bool boolValue;
+ FullLazySubstring(this.data, this.start, this.length, this.boolValue)
+ : super.internal();
+}
+
+bool isUserDefinableOperator(String value) {
+ return isBinaryOperator(value) ||
+ isMinusOperator(value) ||
+ isTernaryOperator(value) ||
+ isUnaryOperator(value);
+}
+
+bool isUnaryOperator(String value) => value == '~';
+
+bool isBinaryOperator(String value) {
+ return value == '==' ||
+ value == '[]' ||
+ value == '*' ||
+ value == '/' ||
+ value == '%' ||
+ value == '~/' ||
+ value == '+' ||
+ value == '<<' ||
+ value == '>>' ||
+ value == '>=' ||
+ value == '>' ||
+ value == '<=' ||
+ value == '<' ||
+ value == '&' ||
+ value == '^' ||
+ value == '|';
+}
+
+bool isTernaryOperator(String value) => value == '[]=';
+
+bool isMinusOperator(String value) => value == '-';
« no previous file with comments | « pkg/dart_scanner/lib/src/precedence.dart ('k') | pkg/dart_scanner/lib/src/token_constants.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698