Chromium Code Reviews| Index: sdk/lib/_internal/compiler/implementation/scanner/token.dart |
| diff --git a/sdk/lib/_internal/compiler/implementation/scanner/token.dart b/sdk/lib/_internal/compiler/implementation/scanner/token.dart |
| index a94a2d4410d952faf44216fb859e21b37993d466..d9909e3d79a16bdb529585466ce386aa7b12143d 100644 |
| --- a/sdk/lib/_internal/compiler/implementation/scanner/token.dart |
| +++ b/sdk/lib/_internal/compiler/implementation/scanner/token.dart |
| @@ -79,36 +79,54 @@ const int STRING_INTERPOLATION_IDENTIFIER_TOKEN = COMMENT_TOKEN + 1; |
| /** |
| * A token that doubles as a linked list. |
| */ |
| -class Token implements Spannable { |
| - /** |
| - * The precedence info for this token. [info] determines the kind and the |
| - * precedence level of this token. |
| - */ |
| - final PrecedenceInfo info; |
| - |
| +abstract class Token implements Spannable { |
| /** |
| * The character offset of the start of this token within the source text. |
| */ |
| final int charOffset; |
| + Token(this.charOffset); |
| + |
| /** |
| * The next token in the token stream. |
| */ |
| Token next; |
| - Token(this.info, this.charOffset); |
| + /** |
| + * The precedence info for this token. [info] determines the kind and the |
| + * precedence level of this token. |
| + * |
| + * Defined as getter to save a field in the [KeywordToken] subclass. |
| + */ |
| + PrecedenceInfo get info; |
| - get value => info.value; |
| + /** |
| + * The string represented by this token, a substring of the source code. |
| + * |
| + * For [StringToken]s the value includes the quotes, explicit escapes, etc. |
|
ngeoffray
2013/10/18 10:19:37
the value -> [value]
lukas
2013/10/24 16:48:36
Done.
|
| + * |
| + */ |
| + String get value; |
| /** |
| - * Returns the string value for keywords and symbols. For instance 'class' for |
| - * the [CLASS] keyword token and '*' for a [Token] based on [STAR_INFO]. For |
| - * other tokens, such identifiers, strings, numbers, etc, [stringValue] |
| - * returns [:null:]. |
| + * For symbol and keyword tokens, returns the string value reprenseted by this |
|
ngeoffray
2013/10/18 10:19:37
represented
lukas
2013/10/24 16:48:36
Done.
|
| + * token. For [StringToken]s this method returns [:null:]. |
| + * |
| + * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time |
| + * constant originating in the [PrecedenceInfo] or in the [Keyword] instance. |
| + * This allows testing for keywords and symbols using [:identical:], e.g., |
| + * [:identical('class', token.value):]. |
| * |
| - * [stringValue] should only be used for testing keywords and symbols. |
| + * Note that returning [:null:] for string tokens is important to identify |
| + * symbols and keywords, we cannot use [value] instead. The string literal |
| + * "$a($b" |
| + * produces ..., SymbolToken($), StringToken(a), StringToken((), ... |
| + * |
| + * After parsing the identifier 'a', the parser tests for a function |
| + * declaration using [:identical(next.stringValue, '('):], which (rihgtfully) |
| + * returns false because stringValue returns [:null:]. |
| */ |
| - String get stringValue => info.value.stringValue; |
| + String get stringValue; |
| /** |
| * The kind enum of this token as determined by its [info]. |
| @@ -120,31 +138,32 @@ class Token implements Spannable { |
| */ |
| int get precedence => info.precedence; |
| - bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN); |
| + /** |
| + * True if this token is an identifier. Some keywords allowed as identifiers, |
| + * see implementaiton in [KeywordToken]. |
|
ngeoffray
2013/10/18 10:19:37
implementation
lukas
2013/10/24 16:48:36
Done.
|
| + */ |
| + bool isIdentifier(); |
| /** |
| * Returns a textual representation of this token to be used for debugging |
| * purposes. The resulting string might contain information about the |
| * structure of the token, for example 'StringToken(foo)' for the identifier |
| - * token 'foo'. Use [slowToString] for the text actually parsed by the token. |
| - */ |
| - String toString() => info.value.toString(); |
| - |
| - /** |
| - * The text parsed by this token. |
| + * token 'foo'. |
| + * |
| + * Use [value] for the text actually parsed by the token. |
| */ |
| - String slowToString() => toString(); |
| + String toString(); |
| /** |
| * The number of characters parsed by this token. |
| */ |
| - int get slowCharCount { |
| + int get charCount { |
| if (info == BAD_INPUT_INFO) { |
| // This is a token that wraps around an error message. Return 1 |
| // instead of the size of the length of the error message. |
| return 1; |
| } else { |
| - return slowToString().length; |
| + return value.length; |
| } |
| } |
| @@ -152,129 +171,230 @@ class Token implements Spannable { |
| } |
| /** |
| - * A keyword token. |
| + * A symbol token represents the symbol in its precendence info. |
|
ngeoffray
2013/10/18 10:19:37
symbol token -> [SymbolToken]
lukas
2013/10/24 16:48:36
Done.
|
| + * Also used for end of file with EOF_INFO. |
| */ |
| -class KeywordToken extends Token { |
| - final Keyword value; |
| - String get stringValue => value.syntax; |
| +class SymbolToken extends Token { |
| - KeywordToken(Keyword value, int charOffset) |
| - : this.value = value, super(value.info, charOffset); |
| + final PrecedenceInfo info; |
| + |
| + SymbolToken(this.info, int charOffset) : super(charOffset); |
| + |
| + String get value => info.value; |
| - bool isIdentifier() => value.isPseudo || value.isBuiltIn; |
| + String get stringValue => info.value; |
| - String toString() => value.syntax; |
| + bool isIdentifier() => false; |
| + |
| + String toString() => "SymbolToken($value)"; |
| } |
| /** |
| - * A String-valued token. |
| + * A [BeginGroupToken] reprsents a symbol that may be the beginning of |
|
ngeoffray
2013/10/18 10:19:37
represents
lukas
2013/10/24 16:48:36
Done.
|
| + * a pair of brackets, i.e., ( { [ < or ${ |
| + * The [endGroup] token points to the matching closing bracked in case |
| + * it can be identified during scanning. |
| */ |
| -class StringToken extends Token { |
| - final SourceString value; |
| - |
| - StringToken(PrecedenceInfo info, String value, int charOffset) |
| - : this.fromSource(info, new SourceString(value), charOffset); |
| +class BeginGroupToken extends SymbolToken { |
| + Token endGroup; |
| - StringToken.fromSource(PrecedenceInfo info, this.value, int charOffset) |
| + BeginGroupToken(PrecedenceInfo info, int charOffset) |
| : super(info, charOffset); |
|
ngeoffray
2013/10/18 10:19:37
Fits in one line?
lukas
2013/10/24 16:48:36
No :)
|
| - |
| - String toString() => "StringToken(${value.slowToString()})"; |
| - |
| - String slowToString() => value.slowToString(); |
| } |
| -abstract class SourceString extends IterableBase<int> { |
| - const factory SourceString(String string) = StringWrapper; |
| +/** |
| + * A keyword token. |
| + */ |
| +class KeywordToken extends Token { |
| + final Keyword keyword; |
| - static final Map<String, StringWrapper> canonicalizedValues = |
| - new Map<String, StringWrapper>(); |
| + KeywordToken(this.keyword, int charOffset) : super(charOffset); |
| - factory SourceString.fromSubstring(String string, int begin, int end) { |
| - var substring = string.substring(begin, end); |
| - return canonicalizedValues.putIfAbsent( |
| - substring, () => new StringWrapper(substring)); |
| - } |
| + PrecedenceInfo get info => keyword.info; |
| - void printOn(StringBuffer sb); |
| + String get value => keyword.syntax; |
| - /** Gives a [SourceString] that is not including the [initial] first and |
| - * [terminal] last characters. This is only intended to be used to remove |
| - * quotes from string literals (including an initial '@' for raw strings). |
| - */ |
| - SourceString copyWithoutQuotes(int initial, int terminal); |
| + String get stringValue => keyword.syntax; |
| - String get stringValue; |
| + bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn; |
| - String slowToString(); |
| - |
| - bool get isEmpty; |
| - |
| - bool isPrivate(); |
| + String toString() => "KeywordToken($value)"; |
| } |
| -class StringWrapper extends IterableBase<int> implements SourceString { |
| - final String stringValue; |
| +/** |
| + * A String-valued token. Represents identifiers, string literals, |
| + * number literals, comments and error tokens, using the corresponding |
|
ngeoffray
2013/10/18 10:19:37
comments, and ...
lukas
2013/10/24 16:48:36
Done.
|
| + * precedence info. |
| + */ |
| +class StringToken extends Token { |
| + /** |
| + * The length threshold above which substring tokens are computed lazily. |
| + * |
| + * For string tokens that are substrings of the program source, the actual |
| + * substring extraction is performed lazily. This is beneficial because |
| + * not all scanned code is actually used. For unused parts, the substrings |
| + * are never computed and allocated. |
| + */ |
| + static const int LAZY_THRESHOLD = 4; |
|
sra1
2013/10/22 19:52:31
How did you calculate this threshold?
lukas
2013/10/23 07:11:01
Short strings have a smaller footprint than a Comp
|
| - const StringWrapper(this.stringValue); |
| + var valueOrLazySubstring; |
|
ngeoffray
2013/10/18 10:19:37
You could put the union type of this field in comm
lukas
2013/10/24 16:48:36
Done.
|
| - int get hashCode => stringValue.hashCode; |
| + final PrecedenceInfo info; |
| + |
| + /** |
| + * Creates a non-lazy string token. If [canonicalize] is true, the string |
| + * is canonicalized before the token is created. |
| + */ |
| + StringToken.fromString(this.info, String value, int charOffset, |
| + [bool canonicalize = false]) |
|
ngeoffray
2013/10/18 10:19:37
Make it a named parameter? Easier when reading cal
lukas
2013/10/24 16:48:36
Done.
|
| + : valueOrLazySubstring = canonicalizedString(value, canonicalize), |
| + super(charOffset); |
| - bool operator ==(other) { |
| - return other is SourceString && toString() == other.slowToString(); |
| + /** |
| + * Creates a lazy string token. If [canonicalize] is true, the string |
| + * is canonicalized before the token is created. |
| + */ |
| + StringToken.fromSubstring(this.info, String data, int start, int end, |
| + int charOffset, [bool canonicalize = false]) |
|
ngeoffray
2013/10/18 10:19:37
ditto
lukas
2013/10/24 16:48:36
Done.
|
| + : super(charOffset) { |
| + int length = end - start; |
| + if (length <= LAZY_THRESHOLD) { |
| + valueOrLazySubstring = canonicalizedString(data.substring(start, end), |
| + canonicalize); |
|
ngeoffray
2013/10/18 10:19:37
indentation.
lukas
2013/10/24 16:48:36
Done.
|
| + } else { |
| + valueOrLazySubstring = |
| + new LazySubstring(data, start, length, canonicalize); |
| + } |
| } |
| - Iterator<int> get iterator => new StringCodeIterator(stringValue); |
| + /** |
| + * Creates a lazy string token. If [asciiOnly] is false, the byte array |
| + * is passed through a UTF-8 decoder. |
| + */ |
| + StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end, |
| + bool asciiOnly, int charOffset) |
| + : super(charOffset) { |
| + int length = end - start; |
| + if (length <= LAZY_THRESHOLD) { |
| + valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly); |
| + } else { |
| + valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly); |
| + } |
| + } |
| - void printOn(StringBuffer sb) { |
| - sb.write(stringValue); |
| + String get value { |
| + if (valueOrLazySubstring is String) { |
| + return valueOrLazySubstring; |
| + } else { |
| + assert(valueOrLazySubstring is LazySubstring); |
| + var data = valueOrLazySubstring.data; |
| + int start = valueOrLazySubstring.start; |
| + int end = start + valueOrLazySubstring.length; |
| + if (data is String) { |
| + valueOrLazySubstring = canonicalizedString( |
| + data.substring(start, end), valueOrLazySubstring.boolValue); |
| + } else { |
| + valueOrLazySubstring = decodeUtf8( |
| + data, start, end, valueOrLazySubstring.boolValue); |
| + } |
| + return valueOrLazySubstring; |
| + } |
| } |
| - String toString() => stringValue; |
| + String get stringValue => null; |
| - String slowToString() => stringValue; |
| + bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN); |
| - SourceString copyWithoutQuotes(int initial, int terminal) { |
| - assert(0 <= initial); |
| - assert(0 <= terminal); |
| - assert(initial + terminal <= stringValue.length); |
| - return new StringWrapper( |
| - stringValue.substring(initial, stringValue.length - terminal)); |
| - } |
| + String toString() => "StringToken($value)"; |
| - bool get isEmpty => stringValue.isEmpty; |
| + static final HashSet<String> canonicalizedSubstrings = |
| + new HashSet(); |
|
ngeoffray
2013/10/18 10:19:37
HashSet<String>()
lukas
2013/10/24 16:48:36
Done.
|
| - bool isPrivate() => !isEmpty && stringValue.codeUnitAt(0) == $_; |
| + static String canonicalizedString(String s, bool canonicalize) { |
| + if (!canonicalize) return s; |
| + var result = canonicalizedSubstrings.lookup(s); |
| + if (result != null) return result; |
| + canonicalizedSubstrings.add(s); |
| + return s; |
| + } |
| + |
| + static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) { |
| + var s; |
| + if (asciiOnly) { |
| + // getRange returns an iterator, it does not copy the data. |
| + s = new String.fromCharCodes(data.getRange(start, end)); |
| + } else { |
| + // TODO(lry): this is measurably slow. Also sublist is copied eagerly. |
| + var bytes = data.sublist(start, end); |
| + s = UTF8.decode(bytes); |
| + } |
| + return canonicalizedString(s, true); |
| + } |
| } |
| -class StringCodeIterator implements Iterator<int> { |
| - final String string; |
| - int index; |
| - final int end; |
| - int _current; |
| +/** |
| + * This class represents the necessary information to compute a substring |
| + * lazily. The substring can either originate in a string or in a [:List<int>:] |
|
ngeoffray
2013/10/18 10:19:37
originate in -> originate from?
lukas
2013/10/24 16:48:36
Done.
|
| + * of UTF-8 bytes. |
| + */ |
| +abstract class LazySubstring { |
| + /** The original data, either a string or a List<int> */ |
| + get data; |
| - StringCodeIterator(String string) : |
| - this.string = string, index = 0, end = string.length; |
| + int get start; |
| + int get length; |
| - StringCodeIterator.substring(this.string, this.index, this.end) { |
| - assert(0 <= index); |
| - assert(index <= end); |
| - assert(end <= string.length); |
| + /** |
| + * If this substring is based on a String, the boolean indicates wheter the |
|
ngeoffray
2013/10/18 10:19:37
the boolean -> [boolValue]
lukas
2013/10/24 16:48:36
Done.
|
| + * resulting substring should be canonicalized. |
| + * |
| + * For substrings based on a byte array, the boolean value is true if the |
|
ngeoffray
2013/10/18 10:19:37
ditto
lukas
2013/10/24 16:48:36
Done.
|
| + * array only holds ASCII characters. The resulting substring will be |
| + * canonicalized after decoding. |
| + */ |
| + bool get boolValue; |
| + |
| + LazySubstring.internal(); |
| + |
| + factory LazySubstring(data, int start, int length, bool b) { |
| + // See comment on [CompactLazySubstring]. |
| + if (start < 0x100000 && length < 0x200) { |
| + int fields = (start << 9); |
| + fields = fields | length; |
| + fields = fields << 1; |
| + if (b) fields |= 1; |
| + return new CompactLazySubstring(data, fields); |
| + } else { |
| + return new FullLazySubstring(data, start, length, b); |
| + } |
| } |
| +} |
| - int get current => _current; |
| +/** |
| + * This class encodes [start], [length] and [boolValue] in a single |
| + * 30 bit integer. It uses 20 bits for [start], which covers source files |
| + * of 1M. [length] has 9 bits, which covers 512 characters. |
|
ngeoffray
2013/10/18 10:19:37
1M -> 1MB.
lukas
2013/10/24 16:48:36
Done.
|
| + * |
| + * The file html_dart2js.dart is currently around 1M. |
|
ngeoffray
2013/10/18 10:19:37
1M -> 1MB
lukas
2013/10/24 16:48:36
Done.
|
| + */ |
| +class CompactLazySubstring extends LazySubstring { |
| + final data; |
| + final int fields; |
| - bool moveNext() { |
| - _current = null; |
| - if (index >= end) return false; |
| - _current = string.codeUnitAt(index++); |
| - return true; |
| - } |
| + CompactLazySubstring(this.data, this.fields) : super.internal(); |
| + |
| + int get start => fields >> 10; |
| + int get length => (fields >> 1) & 0x1ff; |
| + bool get boolValue => (fields & 1) == 1; |
| } |
| -class BeginGroupToken extends StringToken { |
| - Token endGroup; |
| - BeginGroupToken(PrecedenceInfo info, String value, int charOffset) |
| - : super(info, value, charOffset); |
| +class FullLazySubstring extends LazySubstring { |
| + final data; |
| + final int start; |
| + final int length; |
| + final bool boolValue; |
| + FullLazySubstring(this.data, this.start, this.length, this.boolValue) |
| + : super.internal(); |
| } |
| bool isUserDefinableOperator(String value) { |