pkg/dart_scanner/lib/src/token.dart - Issue 2621153006: Copy scanner and parser to own packages.

Unified Diff: pkg/dart_scanner/lib/src/token.dart

Issue 2621153006: Copy scanner and parser to own packages. (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: pkg/dart_scanner/lib/src/token.dart

diff --git a/pkg/dart_scanner/lib/src/token.dart b/pkg/dart_scanner/lib/src/token.dart

new file mode 100644

index 0000000000000000000000000000000000000000..af31142ec0a5fac70a2f932ec347218462da0513

--- /dev/null

+++ b/pkg/dart_scanner/lib/src/token.dart

@@ -0,0 +1,430 @@

+// BSD-style license that can be found in the LICENSE file.

+library dart2js.tokens;

+import 'dart:collection' show HashSet;

+import 'dart:convert' show UTF8;

+import '../common.dart';

+import '../util/util.dart' show computeHashCode;

+import 'keyword.dart' show Keyword;

+import 'precedence.dart' show PrecedenceInfo;

+import 'precedence_constants.dart' as Precedence show BAD_INPUT_INFO;

+import 'token_constants.dart' as Tokens show IDENTIFIER_TOKEN;

+/**

+ * A token that doubles as a linked list.

+ */

+abstract class Token implements Spannable {

+ /**

+ * The character offset of the start of this token within the source text.

+ */

+ final int charOffset;

+ Token(this.charOffset);

+ /**

+ * The next token in the token stream.

+ */

+ Token next;

+ /**

+ * The precedence info for this token. [info] determines the kind and the

+ * precedence level of this token.

+ *

+ * Defined as getter to save a field in the [KeywordToken] subclass.

+ */

+ PrecedenceInfo get info;

+ /**

+ * The string represented by this token, a substring of the source code.

+ *

+ * For [StringToken]s the [value] includes the quotes, explicit escapes, etc.

+ */

+ String get value;

+ /**

+ * For symbol and keyword tokens, returns the string value represented by this

+ * token. For [StringToken]s this method returns [:null:].

+ *

+ * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time

+ * constant originating in the [PrecedenceInfo] or in the [Keyword] instance.

+ * This allows testing for keywords and symbols using [:identical:], e.g.,

+ * [:identical('class', token.value):].

+ *

+ * Note that returning [:null:] for string tokens is important to identify

+ * symbols and keywords, we cannot use [value] instead. The string literal

+ * "$a($b"

+ * produces ..., SymbolToken($), StringToken(a), StringToken((), ...

+ *

+ * After parsing the identifier 'a', the parser tests for a function

+ * declaration using [:identical(next.stringValue, '('):], which (rightfully)

+ * returns false because stringValue returns [:null:].

+ */

+ String get stringValue;

+ /**

+ * The kind enum of this token as determined by its [info].

+ */

+ int get kind => info.kind;

+ /**

+ * The precedence level for this token.

+ */

+ int get precedence => info.precedence;

+ /**

+ * True if this token is an identifier. Some keywords allowed as identifiers,

+ * see implementation in [KeywordToken].

+ */

+ bool isIdentifier();

+ /**

+ * Returns a textual representation of this token to be used for debugging

+ * purposes. The resulting string might contain information about the

+ * structure of the token, for example 'StringToken(foo)' for the identifier

+ * token 'foo'.

+ *

+ * Use [value] for the text actually parsed by the token.

+ */

+ String toString();

+ /**

+ * The number of characters parsed by this token.

+ */

+ int get charCount {

+ if (info == Precedence.BAD_INPUT_INFO) {

+ // This is a token that wraps around an error message. Return 1

+ // instead of the size of the length of the error message.

+ return 1;

+ } else {

+ return value.length;

+ }

+ /// The character offset of the end of this token within the source text.

+ int get charEnd => charOffset + charCount;

+ int get hashCode => computeHashCode(charOffset, info, value);

+/// A pair of tokens marking the beginning and the end of a span. Use for error

+/// reporting.

+class TokenPair implements Spannable {

+ final Token begin;

+ final Token end;

+ TokenPair(this.begin, this.end);

+/**

+ * A [SymbolToken] represents the symbol in its precendence info.

+ * Also used for end of file with EOF_INFO.

+ */

+class SymbolToken extends Token {

+ final PrecedenceInfo info;

+ SymbolToken(this.info, int charOffset) : super(charOffset);

+ String get value => info.value;

+ String get stringValue => info.value;

+ bool isIdentifier() => false;

+ String toString() => "SymbolToken($value)";

+/**

+ * A [BeginGroupToken] represents a symbol that may be the beginning of

+ * a pair of brackets, i.e., ( { [ < or ${

+ * The [endGroup] token points to the matching closing bracked in case

+ * it can be identified during scanning.

+ */

+class BeginGroupToken extends SymbolToken {

+ Token endGroup;

+ BeginGroupToken(PrecedenceInfo info, int charOffset)

+ : super(info, charOffset);

+/**

+ * A keyword token.

+ */

+class KeywordToken extends Token {

+ final Keyword keyword;

+ KeywordToken(this.keyword, int charOffset) : super(charOffset);

+ PrecedenceInfo get info => keyword.info;

+ String get value => keyword.syntax;

+ String get stringValue => keyword.syntax;

+ bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn;

+ String toString() => "KeywordToken($value)";

+abstract class ErrorToken extends Token {

+ ErrorToken(int charOffset) : super(charOffset);

+ PrecedenceInfo get info => Precedence.BAD_INPUT_INFO;

+ String get value {

+ throw new SpannableAssertionFailure(this, assertionMessage);

+ }

+ String get stringValue => null;

+ bool isIdentifier() => false;

+ String get assertionMessage;

+class BadInputToken extends ErrorToken {

+ final int character;

+ BadInputToken(this.character, int charOffset) : super(charOffset);

+ String toString() => "BadInputToken($character)";

+ String get assertionMessage {

+ return 'Character U+${character.toRadixString(16)} not allowed here.';

+ }

+class UnterminatedToken extends ErrorToken {

+ final String start;

+ final int endOffset;

+ UnterminatedToken(this.start, int charOffset, this.endOffset)

+ : super(charOffset);

+ String toString() => "UnterminatedToken($start)";

+ String get assertionMessage => "'$start' isn't terminated.";

+ int get charCount => endOffset - charOffset;

+class UnmatchedToken extends ErrorToken {

+ final BeginGroupToken begin;

+ UnmatchedToken(BeginGroupToken begin)

+ : this.begin = begin,

+ super(begin.charOffset);

+ String toString() => "UnmatchedToken(${begin.value})";

+ String get assertionMessage => "'$begin' isn't closed.";

+/**

+ * A String-valued token. Represents identifiers, string literals,

+ * number literals, comments, and error tokens, using the corresponding

+ * precedence info.

+ */

+class StringToken extends Token {

+ /**

+ * The length threshold above which substring tokens are computed lazily.

+ *

+ * For string tokens that are substrings of the program source, the actual

+ * substring extraction is performed lazily. This is beneficial because

+ * not all scanned code is actually used. For unused parts, the substrings

+ * are never computed and allocated.

+ */

+ static const int LAZY_THRESHOLD = 4;

+ var /* String | LazySubtring */ valueOrLazySubstring;

+ final PrecedenceInfo info;

+ /**

+ * Creates a non-lazy string token. If [canonicalize] is true, the string

+ * is canonicalized before the token is created.

+ */

+ StringToken.fromString(this.info, String value, int charOffset,

+ {bool canonicalize: false})

+ : valueOrLazySubstring = canonicalizedString(value, canonicalize),

+ super(charOffset);

+ /**

+ * Creates a lazy string token. If [canonicalize] is true, the string

+ * is canonicalized before the token is created.

+ */

+ StringToken.fromSubstring(

+ this.info, String data, int start, int end, int charOffset,

+ {bool canonicalize: false})

+ : super(charOffset) {

+ int length = end - start;

+ if (length <= LAZY_THRESHOLD) {

+ valueOrLazySubstring =

+ canonicalizedString(data.substring(start, end), canonicalize);

+ } else {

+ valueOrLazySubstring =

+ new LazySubstring(data, start, length, canonicalize);

+ }

+ /**

+ * Creates a lazy string token. If [asciiOnly] is false, the byte array

+ * is passed through a UTF-8 decoder.

+ */

+ StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end,

+ bool asciiOnly, int charOffset)

+ : super(charOffset) {

+ int length = end - start;

+ if (length <= LAZY_THRESHOLD) {

+ valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly);

+ } else {

+ valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly);

+ }

+ String get value {

+ if (valueOrLazySubstring is String) {

+ return valueOrLazySubstring;

+ } else {

+ assert(valueOrLazySubstring is LazySubstring);

+ var data = valueOrLazySubstring.data;

+ int start = valueOrLazySubstring.start;

+ int end = start + valueOrLazySubstring.length;

+ if (data is String) {

+ valueOrLazySubstring = canonicalizedString(

+ data.substring(start, end), valueOrLazySubstring.boolValue);

+ } else {

+ valueOrLazySubstring =

+ decodeUtf8(data, start, end, valueOrLazySubstring.boolValue);

+ }

+ return valueOrLazySubstring;

+ }

+ /// See [Token.stringValue] for an explanation.

+ String get stringValue => null;

+ bool isIdentifier() => identical(kind, Tokens.IDENTIFIER_TOKEN);

+ String toString() => "StringToken($value)";

+ static final HashSet<String> canonicalizedSubstrings = new HashSet<String>();

+ static String canonicalizedString(String s, bool canonicalize) {

+ if (!canonicalize) return s;

+ var result = canonicalizedSubstrings.lookup(s);

+ if (result != null) return result;

+ canonicalizedSubstrings.add(s);

+ return s;

+ }

+ static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) {

+ var s;

+ if (asciiOnly) {

+ s = new String.fromCharCodes(data, start, end);

+ } else {

+ s = UTF8.decoder.convert(data, start, end);

+ }

+ return canonicalizedString(s, true);

+ }

+/**

+ * This class represents the necessary information to compute a substring

+ * lazily. The substring can either originate from a string or from

+ * a [:List<int>:] of UTF-8 bytes.

+ */

+abstract class LazySubstring {

+ /** The original data, either a string or a List<int> */

+ get data;

+ int get start;

+ int get length;

+ /**

+ * If this substring is based on a String, the [boolValue] indicates wheter

+ * the resulting substring should be canonicalized.

+ *

+ * For substrings based on a byte array, the [boolValue] is true if the

+ * array only holds ASCII characters. The resulting substring will be

+ * canonicalized after decoding.

+ */

+ bool get boolValue;

+ LazySubstring.internal();

+ factory LazySubstring(data, int start, int length, bool b) {

+ // See comment on [CompactLazySubstring].

+ if (start < 0x100000 && length < 0x200) {

+ int fields = (start << 9);

+ fields = fields | length;

+ fields = fields << 1;

+ if (b) fields |= 1;

+ return new CompactLazySubstring(data, fields);

+ } else {

+ return new FullLazySubstring(data, start, length, b);

+ }

+/**

+ * This class encodes [start], [length] and [boolValue] in a single

+ * 30 bit integer. It uses 20 bits for [start], which covers source files

+ * of 1MB. [length] has 9 bits, which covers 512 characters.

+ *

+ * The file html_dart2js.dart is currently around 1MB.

+ */

+class CompactLazySubstring extends LazySubstring {

+ final data;

+ final int fields;

+ CompactLazySubstring(this.data, this.fields) : super.internal();

+ int get start => fields >> 10;

+ int get length => (fields >> 1) & 0x1ff;

+ bool get boolValue => (fields & 1) == 1;

+class FullLazySubstring extends LazySubstring {

+ final data;

+ final int start;

+ final int length;

+ final bool boolValue;

+ FullLazySubstring(this.data, this.start, this.length, this.boolValue)

+ : super.internal();

+bool isUserDefinableOperator(String value) {

+ return isBinaryOperator(value) ||

+ isMinusOperator(value) ||

+ isTernaryOperator(value) ||

+ isUnaryOperator(value);

+bool isUnaryOperator(String value) => value == '~';

+bool isBinaryOperator(String value) {

+ return value == '==' ||

+ value == '[]' ||

+ value == '*' ||

+ value == '/' ||

+ value == '%' ||

+ value == '~/' ||

+ value == '+' ||

+ value == '<<' ||

+ value == '>>' ||

+ value == '>=' ||

+ value == '>' ||

+ value == '<=' ||

+ value == '<' ||

+ value == '&' ||

+ value == '^' ||

+ value == '|';

+bool isTernaryOperator(String value) => value == '[]=';

+bool isMinusOperator(String value) => value == '-';

« no previous file with comments | « pkg/dart_scanner/lib/src/precedence.dart ('k') | pkg/dart_scanner/lib/src/token_constants.dart » ('j') | no next file with comments »