| Index: sdk/lib/_internal/compiler/implementation/scanner/token.dart
|
| diff --git a/sdk/lib/_internal/compiler/implementation/scanner/token.dart b/sdk/lib/_internal/compiler/implementation/scanner/token.dart
|
| index a94a2d4410d952faf44216fb859e21b37993d466..6709f53f02643d38d01609fe4982c88ef674c72f 100644
|
| --- a/sdk/lib/_internal/compiler/implementation/scanner/token.dart
|
| +++ b/sdk/lib/_internal/compiler/implementation/scanner/token.dart
|
| @@ -79,36 +79,54 @@ const int STRING_INTERPOLATION_IDENTIFIER_TOKEN = COMMENT_TOKEN + 1;
|
| /**
|
| * A token that doubles as a linked list.
|
| */
|
| -class Token implements Spannable {
|
| - /**
|
| - * The precedence info for this token. [info] determines the kind and the
|
| - * precedence level of this token.
|
| - */
|
| - final PrecedenceInfo info;
|
| -
|
| +abstract class Token implements Spannable {
|
| /**
|
| * The character offset of the start of this token within the source text.
|
| */
|
| final int charOffset;
|
|
|
| + Token(this.charOffset);
|
| +
|
| /**
|
| * The next token in the token stream.
|
| */
|
| Token next;
|
|
|
| - Token(this.info, this.charOffset);
|
| + /**
|
| + * The precedence info for this token. [info] determines the kind and the
|
| + * precedence level of this token.
|
| + *
|
| + * Defined as getter to save a field in the [KeywordToken] subclass.
|
| + */
|
| + PrecedenceInfo get info;
|
|
|
| - get value => info.value;
|
| + /**
|
| + * The string represented by this token, a substring of the source code.
|
| + *
|
| + * For [StringToken]s the value includes the quotes, explicit escapes, etc.
|
| + *
|
| + */
|
| + String get value;
|
|
|
| /**
|
| - * Returns the string value for keywords and symbols. For instance 'class' for
|
| - * the [CLASS] keyword token and '*' for a [Token] based on [STAR_INFO]. For
|
| - * other tokens, such identifiers, strings, numbers, etc, [stringValue]
|
| - * returns [:null:].
|
| + * For symbol and keyword tokens, returns the string value reprenseted by this
|
| + * token. For [StringToken]s this method returns [:null:].
|
| + *
|
| + * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time
|
| + * constant originating in the [PrecedenceInfo] or in the [Keyword] instance.
|
| + * This allows testing for keywords and symbols using [:identical:], e.g.,
|
| + * [:identical('class', token.value):].
|
| *
|
| - * [stringValue] should only be used for testing keywords and symbols.
|
| + * Note that returning [:null:] for string tokens is important to identify
|
| + * symbols and keywords, we cannot use [value] instead. The string literal
|
| + * "$a($b"
|
| + * produces ..., SymbolToken($), StringToken(a), StringToken((), ...
|
| + *
|
| + * After parsing the identifier 'a', the parser tests for a function
|
| + * declaration using [:identical(next.stringValue, '('):], which (rihgtfully)
|
| + * returns false because stringValue returns [:null:].
|
| */
|
| - String get stringValue => info.value.stringValue;
|
| + String get stringValue;
|
|
|
| /**
|
| * The kind enum of this token as determined by its [info].
|
| @@ -120,31 +138,32 @@ class Token implements Spannable {
|
| */
|
| int get precedence => info.precedence;
|
|
|
| - bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN);
|
| + /**
|
| + * True if this token is an identifier. Some keywords allowed as identifiers,
|
| + * see implementaiton in [KeywordToken].
|
| + */
|
| + bool isIdentifier();
|
|
|
| /**
|
| * Returns a textual representation of this token to be used for debugging
|
| * purposes. The resulting string might contain information about the
|
| * structure of the token, for example 'StringToken(foo)' for the identifier
|
| - * token 'foo'. Use [slowToString] for the text actually parsed by the token.
|
| - */
|
| - String toString() => info.value.toString();
|
| -
|
| - /**
|
| - * The text parsed by this token.
|
| + * token 'foo'.
|
| + *
|
| + * Use [value] for the text actually parsed by the token.
|
| */
|
| - String slowToString() => toString();
|
| + String toString();
|
|
|
| /**
|
| * The number of characters parsed by this token.
|
| */
|
| - int get slowCharCount {
|
| + int get charCount {
|
| if (info == BAD_INPUT_INFO) {
|
| // This is a token that wraps around an error message. Return 1
|
| // instead of the size of the length of the error message.
|
| return 1;
|
| } else {
|
| - return slowToString().length;
|
| + return value.length;
|
| }
|
| }
|
|
|
| @@ -152,129 +171,237 @@ class Token implements Spannable {
|
| }
|
|
|
| /**
|
| - * A keyword token.
|
| + * A symbol token represents the symbol in its precendence info.
|
| + * Also used for end of file with EOF_INFO.
|
| */
|
| -class KeywordToken extends Token {
|
| - final Keyword value;
|
| - String get stringValue => value.syntax;
|
| +class SymbolToken extends Token {
|
|
|
| - KeywordToken(Keyword value, int charOffset)
|
| - : this.value = value, super(value.info, charOffset);
|
| + final PrecedenceInfo info;
|
| +
|
| + SymbolToken(this.info, int charOffset) : super(charOffset);
|
| +
|
| + String get value => info.value;
|
|
|
| - bool isIdentifier() => value.isPseudo || value.isBuiltIn;
|
| + String get stringValue => info.value;
|
|
|
| - String toString() => value.syntax;
|
| + bool isIdentifier() => false;
|
| +
|
| + String toString() => "SymbolToken($value)";
|
| }
|
|
|
| /**
|
| - * A String-valued token.
|
| + * A [BeginGroupToken] reprsents a symbol that may be the beginning of
|
| + * a pair of brackets, i.e., ( { [ < or ${
|
| + * The [endGroup] token points to the matching closing bracked in case
|
| + * it can be identified during scanning.
|
| */
|
| -class StringToken extends Token {
|
| - final SourceString value;
|
| -
|
| - StringToken(PrecedenceInfo info, String value, int charOffset)
|
| - : this.fromSource(info, new SourceString(value), charOffset);
|
| +class BeginGroupToken extends SymbolToken {
|
| + Token endGroup;
|
|
|
| - StringToken.fromSource(PrecedenceInfo info, this.value, int charOffset)
|
| + BeginGroupToken(PrecedenceInfo info, int charOffset)
|
| : super(info, charOffset);
|
| -
|
| - String toString() => "StringToken(${value.slowToString()})";
|
| -
|
| - String slowToString() => value.slowToString();
|
| }
|
|
|
| -abstract class SourceString extends IterableBase<int> {
|
| - const factory SourceString(String string) = StringWrapper;
|
| +/**
|
| + * A keyword token.
|
| + */
|
| +class KeywordToken extends Token {
|
| + final Keyword keyword;
|
|
|
| - static final Map<String, StringWrapper> canonicalizedValues =
|
| - new Map<String, StringWrapper>();
|
| + KeywordToken(this.keyword, int charOffset) : super(charOffset);
|
|
|
| - factory SourceString.fromSubstring(String string, int begin, int end) {
|
| - var substring = string.substring(begin, end);
|
| - return canonicalizedValues.putIfAbsent(
|
| - substring, () => new StringWrapper(substring));
|
| - }
|
| + PrecedenceInfo get info => keyword.info;
|
|
|
| - void printOn(StringBuffer sb);
|
| + String get value => keyword.syntax;
|
|
|
| - /** Gives a [SourceString] that is not including the [initial] first and
|
| - * [terminal] last characters. This is only intended to be used to remove
|
| - * quotes from string literals (including an initial '@' for raw strings).
|
| - */
|
| - SourceString copyWithoutQuotes(int initial, int terminal);
|
| + String get stringValue => keyword.syntax;
|
|
|
| - String get stringValue;
|
| + bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn;
|
|
|
| - String slowToString();
|
| -
|
| - bool get isEmpty;
|
| -
|
| - bool isPrivate();
|
| + String toString() => "KeywordToken($value)";
|
| }
|
|
|
| -class StringWrapper extends IterableBase<int> implements SourceString {
|
| - final String stringValue;
|
| +/**
|
| + * A String-valued token. Represents identifiers, string literals,
|
| + * number literals, comments and error tokens, using the corresponding
|
| + * precedence info.
|
| + */
|
| +class StringToken extends Token {
|
| + /**
|
| + * The length thershold above which substring tokens are computed lazily.
|
| + *
|
| + * For string tokens that are substrings of the program source, the actual
|
| + * substring extraction is performed lazily. This is beneficial because
|
| + * not all scanned code is actually used. For unused parts, the substrings
|
| + * are never computed and allocated.
|
| + */
|
| + static final int lazyThreshold = 4;
|
|
|
| - const StringWrapper(this.stringValue);
|
| + var valueOrSublist;
|
|
|
| - int get hashCode => stringValue.hashCode;
|
| + final PrecedenceInfo info;
|
| +
|
| + /**
|
| + * Creates a non-lazy string token. If [canonicalize] is true, the string
|
| + * is canonicalized before the token is created.
|
| + */
|
| + StringToken.fromString(this.info, String value, int charOffset,
|
| + [bool canonicalize = false])
|
| + : valueOrSublist = canonicalizedString(value, canonicalize),
|
| + super(charOffset);
|
|
|
| - bool operator ==(other) {
|
| - return other is SourceString && toString() == other.slowToString();
|
| + /**
|
| + * Creates a lazy string token. If [canonicalize] is true, the string
|
| + * is canonicalized before the token is created.
|
| + */
|
| + StringToken.fromSubstring(this.info, String data, int start, int end,
|
| + int charOffset, [bool canonicalize = false])
|
| + : super(charOffset) {
|
| + int length = end - start;
|
| + if (length <= lazyThreshold) {
|
| + valueOrSublist = canonicalizedString(data.substring(start, end),
|
| + canonicalize);
|
| + } else {
|
| + valueOrSublist = new LazySubstring(data, start, length, canonicalize);
|
| + }
|
| }
|
|
|
| - Iterator<int> get iterator => new StringCodeIterator(stringValue);
|
| + /**
|
| + * Creates a lazy string token. If [asciiOnly] is false, the byte array
|
| + * is passed through a UTF-8 decoder.
|
| + */
|
| + StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end,
|
| + bool asciiOnly, int charOffset)
|
| + : super(charOffset) {
|
| + int length = end - start;
|
| + if (length <= lazyThreshold) {
|
| + valueOrSublist = decodeUtf8(data, start, end, asciiOnly);
|
| + } else {
|
| + valueOrSublist = new LazySubstring(data, start, length, asciiOnly);
|
| + }
|
| + }
|
|
|
| - void printOn(StringBuffer sb) {
|
| - sb.write(stringValue);
|
| + String get value {
|
| + if (valueOrSublist is String) {
|
| + return valueOrSublist;
|
| + } else {
|
| + if (valueOrSublist is CompactLazySubstring ||
|
| + valueOrSublist is FullLazySubstring) {
|
| + var data = valueOrSublist.data;
|
| + int start = valueOrSublist.start;
|
| + int end = start + valueOrSublist.length;
|
| + if (data is String) {
|
| + valueOrSublist = canonicalizedString(data.substring(start, end),
|
| + valueOrSublist.boolValue);
|
| + } else {
|
| + valueOrSublist = decodeUtf8(data, start, end,
|
| + valueOrSublist.boolValue);
|
| + }
|
| + }
|
| + return valueOrSublist;
|
| + }
|
| }
|
|
|
| - String toString() => stringValue;
|
| + String get stringValue => null;
|
|
|
| - String slowToString() => stringValue;
|
| + bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN);
|
|
|
| - SourceString copyWithoutQuotes(int initial, int terminal) {
|
| - assert(0 <= initial);
|
| - assert(0 <= terminal);
|
| - assert(initial + terminal <= stringValue.length);
|
| - return new StringWrapper(
|
| - stringValue.substring(initial, stringValue.length - terminal));
|
| + String toString() => "StringToken($value)";
|
| +
|
| + // @lry replace by hash set after merging from svn trunk
|
| + static final HashSet<String> canonicalizedSubstrings =
|
| + new HashSet();
|
| +
|
| + static String canonicalizedString(String s, bool canonicalize) {
|
| + if (canonicalize) {
|
| + var result = canonicalizedSubstrings.lookup(s);
|
| + if (result == null) {
|
| + canonicalizedSubstrings.add(s);
|
| + return s;
|
| + } else {
|
| + return result;
|
| + }
|
| + } else {
|
| + return s;
|
| + }
|
| }
|
|
|
| - bool get isEmpty => stringValue.isEmpty;
|
| -
|
| - bool isPrivate() => !isEmpty && stringValue.codeUnitAt(0) == $_;
|
| + static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) {
|
| + var s;
|
| + if (asciiOnly) {
|
| + s = new String.fromCharCodes(data.getRange(start, end));
|
| + } else {
|
| + // TODO(lry), this is measurably slow. Also sublist is allocated eagerly.
|
| + var bytes = data.sublist(start, end);
|
| + s = UTF8.decode(bytes);
|
| + }
|
| + return canonicalizedString(s, true);
|
| + }
|
| }
|
|
|
| -class StringCodeIterator implements Iterator<int> {
|
| - final String string;
|
| - int index;
|
| - final int end;
|
| - int _current;
|
| +/**
|
| + * This class represents the necessary information to compute a substring
|
| + * lazily. The substring can either originate in a string or in a [:List<int>:]
|
| + * of UTF-8 bytes.
|
| + */
|
| +abstract class LazySubstring {
|
| + /** The original data, either a string or a List<int> */
|
| + get data;
|
|
|
| - StringCodeIterator(String string) :
|
| - this.string = string, index = 0, end = string.length;
|
| + int get start;
|
| + int get length;
|
|
|
| - StringCodeIterator.substring(this.string, this.index, this.end) {
|
| - assert(0 <= index);
|
| - assert(index <= end);
|
| - assert(end <= string.length);
|
| + /**
|
| + * If this substring is based on a String, the boolean indicates wheter the
|
| + * resulting substring should be canonicalized.
|
| + *
|
| + * For substrings based on a byte array, the boolean value is true if the
|
| + * array only holds ASCII characters. The resulting substring will be
|
| + * canonicalized after decoding.
|
| + */
|
| + bool get boolValue;
|
| +
|
| + LazySubstring.internal();
|
| +
|
| + factory LazySubstring(data, int start, int length, bool b) {
|
| + // See comment on [CompactLazySubstring].
|
| + if (start < 0x100000 && length < 0x200) {
|
| + int fields = (start << 9);
|
| + fields = fields | length;
|
| + fields = fields << 1;
|
| + if (b) fields |= 1;
|
| + return new CompactLazySubstring(data, fields);
|
| + } else {
|
| + return new FullLazySubstring(data, start, length, b);
|
| + }
|
| }
|
| +}
|
|
|
| - int get current => _current;
|
| +/**
|
| + * This class encodes [start], [length] and [boolValue] in a single
|
| + * 30 bit integer. It uses 20 bits for [start], which covers source files
|
| + * of 1M. [length] has 9 bits, which covers 512 characters.
|
| + *
|
| + * The file html_dart2js.dart is currently around 1M.
|
| + */
|
| +class CompactLazySubstring extends LazySubstring {
|
| + final data;
|
| + final int fields;
|
|
|
| - bool moveNext() {
|
| - _current = null;
|
| - if (index >= end) return false;
|
| - _current = string.codeUnitAt(index++);
|
| - return true;
|
| - }
|
| + CompactLazySubstring(this.data, this.fields) : super.internal();
|
| +
|
| + int get start => fields >> 10;
|
| + int get length => (fields >> 1) & 0x1ff;
|
| + bool get boolValue => (fields & 1) == 1;
|
| }
|
|
|
| -class BeginGroupToken extends StringToken {
|
| - Token endGroup;
|
| - BeginGroupToken(PrecedenceInfo info, String value, int charOffset)
|
| - : super(info, value, charOffset);
|
| +class FullLazySubstring extends LazySubstring {
|
| + final data;
|
| + final int start;
|
| + final int length;
|
| + final bool boolValue;
|
| + FullLazySubstring(this.data, this.start, this.length, this.boolValue)
|
| + : super.internal();
|
| }
|
|
|
| bool isUserDefinableOperator(String value) {
|
|
|