Index: sdk/lib/_internal/compiler/implementation/scanner/token.dart |
diff --git a/sdk/lib/_internal/compiler/implementation/scanner/token.dart b/sdk/lib/_internal/compiler/implementation/scanner/token.dart |
index a94a2d4410d952faf44216fb859e21b37993d466..d9909e3d79a16bdb529585466ce386aa7b12143d 100644 |
--- a/sdk/lib/_internal/compiler/implementation/scanner/token.dart |
+++ b/sdk/lib/_internal/compiler/implementation/scanner/token.dart |
@@ -79,36 +79,54 @@ const int STRING_INTERPOLATION_IDENTIFIER_TOKEN = COMMENT_TOKEN + 1; |
/** |
* A token that doubles as a linked list. |
*/ |
-class Token implements Spannable { |
- /** |
- * The precedence info for this token. [info] determines the kind and the |
- * precedence level of this token. |
- */ |
- final PrecedenceInfo info; |
- |
+abstract class Token implements Spannable { |
/** |
* The character offset of the start of this token within the source text. |
*/ |
final int charOffset; |
+ Token(this.charOffset); |
+ |
/** |
* The next token in the token stream. |
*/ |
Token next; |
- Token(this.info, this.charOffset); |
+ /** |
+ * The precedence info for this token. [info] determines the kind and the |
+ * precedence level of this token. |
+ * |
+ * Defined as getter to save a field in the [KeywordToken] subclass. |
+ */ |
+ PrecedenceInfo get info; |
- get value => info.value; |
+ /** |
+ * The string represented by this token, a substring of the source code. |
+ * |
+ * For [StringToken]s the value includes the quotes, explicit escapes, etc. |
ngeoffray
2013/10/18 10:19:37
the value -> [value]
lukas
2013/10/24 16:48:36
Done.
|
+ * |
+ */ |
+ String get value; |
/** |
- * Returns the string value for keywords and symbols. For instance 'class' for |
- * the [CLASS] keyword token and '*' for a [Token] based on [STAR_INFO]. For |
- * other tokens, such identifiers, strings, numbers, etc, [stringValue] |
- * returns [:null:]. |
+ * For symbol and keyword tokens, returns the string value reprenseted by this |
ngeoffray
2013/10/18 10:19:37
represented
lukas
2013/10/24 16:48:36
Done.
|
+ * token. For [StringToken]s this method returns [:null:]. |
+ * |
+ * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time |
+ * constant originating in the [PrecedenceInfo] or in the [Keyword] instance. |
+ * This allows testing for keywords and symbols using [:identical:], e.g., |
+ * [:identical('class', token.value):]. |
* |
- * [stringValue] should only be used for testing keywords and symbols. |
+ * Note that returning [:null:] for string tokens is important to identify |
+ * symbols and keywords, we cannot use [value] instead. The string literal |
+ * "$a($b" |
+ * produces ..., SymbolToken($), StringToken(a), StringToken((), ... |
+ * |
+ * After parsing the identifier 'a', the parser tests for a function |
+ * declaration using [:identical(next.stringValue, '('):], which (rihgtfully) |
+ * returns false because stringValue returns [:null:]. |
*/ |
- String get stringValue => info.value.stringValue; |
+ String get stringValue; |
/** |
* The kind enum of this token as determined by its [info]. |
@@ -120,31 +138,32 @@ class Token implements Spannable { |
*/ |
int get precedence => info.precedence; |
- bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN); |
+ /** |
+ * True if this token is an identifier. Some keywords allowed as identifiers, |
+ * see implementaiton in [KeywordToken]. |
ngeoffray
2013/10/18 10:19:37
implementation
lukas
2013/10/24 16:48:36
Done.
|
+ */ |
+ bool isIdentifier(); |
/** |
* Returns a textual representation of this token to be used for debugging |
* purposes. The resulting string might contain information about the |
* structure of the token, for example 'StringToken(foo)' for the identifier |
- * token 'foo'. Use [slowToString] for the text actually parsed by the token. |
- */ |
- String toString() => info.value.toString(); |
- |
- /** |
- * The text parsed by this token. |
+ * token 'foo'. |
+ * |
+ * Use [value] for the text actually parsed by the token. |
*/ |
- String slowToString() => toString(); |
+ String toString(); |
/** |
* The number of characters parsed by this token. |
*/ |
- int get slowCharCount { |
+ int get charCount { |
if (info == BAD_INPUT_INFO) { |
// This is a token that wraps around an error message. Return 1 |
// instead of the size of the length of the error message. |
return 1; |
} else { |
- return slowToString().length; |
+ return value.length; |
} |
} |
@@ -152,129 +171,230 @@ class Token implements Spannable { |
} |
/** |
- * A keyword token. |
+ * A symbol token represents the symbol in its precendence info. |
ngeoffray
2013/10/18 10:19:37
symbol token -> [SymbolToken]
lukas
2013/10/24 16:48:36
Done.
|
+ * Also used for end of file with EOF_INFO. |
*/ |
-class KeywordToken extends Token { |
- final Keyword value; |
- String get stringValue => value.syntax; |
+class SymbolToken extends Token { |
- KeywordToken(Keyword value, int charOffset) |
- : this.value = value, super(value.info, charOffset); |
+ final PrecedenceInfo info; |
+ |
+ SymbolToken(this.info, int charOffset) : super(charOffset); |
+ |
+ String get value => info.value; |
- bool isIdentifier() => value.isPseudo || value.isBuiltIn; |
+ String get stringValue => info.value; |
- String toString() => value.syntax; |
+ bool isIdentifier() => false; |
+ |
+ String toString() => "SymbolToken($value)"; |
} |
/** |
- * A String-valued token. |
+ * A [BeginGroupToken] reprsents a symbol that may be the beginning of |
ngeoffray
2013/10/18 10:19:37
represents
lukas
2013/10/24 16:48:36
Done.
|
+ * a pair of brackets, i.e., ( { [ < or ${ |
+ * The [endGroup] token points to the matching closing bracked in case |
+ * it can be identified during scanning. |
*/ |
-class StringToken extends Token { |
- final SourceString value; |
- |
- StringToken(PrecedenceInfo info, String value, int charOffset) |
- : this.fromSource(info, new SourceString(value), charOffset); |
+class BeginGroupToken extends SymbolToken { |
+ Token endGroup; |
- StringToken.fromSource(PrecedenceInfo info, this.value, int charOffset) |
+ BeginGroupToken(PrecedenceInfo info, int charOffset) |
: super(info, charOffset); |
ngeoffray
2013/10/18 10:19:37
Fits in one line?
lukas
2013/10/24 16:48:36
No :)
|
- |
- String toString() => "StringToken(${value.slowToString()})"; |
- |
- String slowToString() => value.slowToString(); |
} |
-abstract class SourceString extends IterableBase<int> { |
- const factory SourceString(String string) = StringWrapper; |
+/** |
+ * A keyword token. |
+ */ |
+class KeywordToken extends Token { |
+ final Keyword keyword; |
- static final Map<String, StringWrapper> canonicalizedValues = |
- new Map<String, StringWrapper>(); |
+ KeywordToken(this.keyword, int charOffset) : super(charOffset); |
- factory SourceString.fromSubstring(String string, int begin, int end) { |
- var substring = string.substring(begin, end); |
- return canonicalizedValues.putIfAbsent( |
- substring, () => new StringWrapper(substring)); |
- } |
+ PrecedenceInfo get info => keyword.info; |
- void printOn(StringBuffer sb); |
+ String get value => keyword.syntax; |
- /** Gives a [SourceString] that is not including the [initial] first and |
- * [terminal] last characters. This is only intended to be used to remove |
- * quotes from string literals (including an initial '@' for raw strings). |
- */ |
- SourceString copyWithoutQuotes(int initial, int terminal); |
+ String get stringValue => keyword.syntax; |
- String get stringValue; |
+ bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn; |
- String slowToString(); |
- |
- bool get isEmpty; |
- |
- bool isPrivate(); |
+ String toString() => "KeywordToken($value)"; |
} |
-class StringWrapper extends IterableBase<int> implements SourceString { |
- final String stringValue; |
+/** |
+ * A String-valued token. Represents identifiers, string literals, |
+ * number literals, comments and error tokens, using the corresponding |
ngeoffray
2013/10/18 10:19:37
comments, and ...
lukas
2013/10/24 16:48:36
Done.
|
+ * precedence info. |
+ */ |
+class StringToken extends Token { |
+ /** |
+ * The length threshold above which substring tokens are computed lazily. |
+ * |
+ * For string tokens that are substrings of the program source, the actual |
+ * substring extraction is performed lazily. This is beneficial because |
+ * not all scanned code is actually used. For unused parts, the substrings |
+ * are never computed and allocated. |
+ */ |
+ static const int LAZY_THRESHOLD = 4; |
sra1
2013/10/22 19:52:31
How did you calculate this threshold?
lukas
2013/10/23 07:11:01
Short strings have a smaller footprint than a Comp
|
- const StringWrapper(this.stringValue); |
+ var valueOrLazySubstring; |
ngeoffray
2013/10/18 10:19:37
You could put the union type of this field in comm
lukas
2013/10/24 16:48:36
Done.
|
- int get hashCode => stringValue.hashCode; |
+ final PrecedenceInfo info; |
+ |
+ /** |
+ * Creates a non-lazy string token. If [canonicalize] is true, the string |
+ * is canonicalized before the token is created. |
+ */ |
+ StringToken.fromString(this.info, String value, int charOffset, |
+ [bool canonicalize = false]) |
ngeoffray
2013/10/18 10:19:37
Make it a named parameter? Easier when reading cal
lukas
2013/10/24 16:48:36
Done.
|
+ : valueOrLazySubstring = canonicalizedString(value, canonicalize), |
+ super(charOffset); |
- bool operator ==(other) { |
- return other is SourceString && toString() == other.slowToString(); |
+ /** |
+ * Creates a lazy string token. If [canonicalize] is true, the string |
+ * is canonicalized before the token is created. |
+ */ |
+ StringToken.fromSubstring(this.info, String data, int start, int end, |
+ int charOffset, [bool canonicalize = false]) |
ngeoffray
2013/10/18 10:19:37
ditto
lukas
2013/10/24 16:48:36
Done.
|
+ : super(charOffset) { |
+ int length = end - start; |
+ if (length <= LAZY_THRESHOLD) { |
+ valueOrLazySubstring = canonicalizedString(data.substring(start, end), |
+ canonicalize); |
ngeoffray
2013/10/18 10:19:37
indentation.
lukas
2013/10/24 16:48:36
Done.
|
+ } else { |
+ valueOrLazySubstring = |
+ new LazySubstring(data, start, length, canonicalize); |
+ } |
} |
- Iterator<int> get iterator => new StringCodeIterator(stringValue); |
+ /** |
+ * Creates a lazy string token. If [asciiOnly] is false, the byte array |
+ * is passed through a UTF-8 decoder. |
+ */ |
+ StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end, |
+ bool asciiOnly, int charOffset) |
+ : super(charOffset) { |
+ int length = end - start; |
+ if (length <= LAZY_THRESHOLD) { |
+ valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly); |
+ } else { |
+ valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly); |
+ } |
+ } |
- void printOn(StringBuffer sb) { |
- sb.write(stringValue); |
+ String get value { |
+ if (valueOrLazySubstring is String) { |
+ return valueOrLazySubstring; |
+ } else { |
+ assert(valueOrLazySubstring is LazySubstring); |
+ var data = valueOrLazySubstring.data; |
+ int start = valueOrLazySubstring.start; |
+ int end = start + valueOrLazySubstring.length; |
+ if (data is String) { |
+ valueOrLazySubstring = canonicalizedString( |
+ data.substring(start, end), valueOrLazySubstring.boolValue); |
+ } else { |
+ valueOrLazySubstring = decodeUtf8( |
+ data, start, end, valueOrLazySubstring.boolValue); |
+ } |
+ return valueOrLazySubstring; |
+ } |
} |
- String toString() => stringValue; |
+ String get stringValue => null; |
- String slowToString() => stringValue; |
+ bool isIdentifier() => identical(kind, IDENTIFIER_TOKEN); |
- SourceString copyWithoutQuotes(int initial, int terminal) { |
- assert(0 <= initial); |
- assert(0 <= terminal); |
- assert(initial + terminal <= stringValue.length); |
- return new StringWrapper( |
- stringValue.substring(initial, stringValue.length - terminal)); |
- } |
+ String toString() => "StringToken($value)"; |
- bool get isEmpty => stringValue.isEmpty; |
+ static final HashSet<String> canonicalizedSubstrings = |
+ new HashSet(); |
ngeoffray
2013/10/18 10:19:37
HashSet<String>()
lukas
2013/10/24 16:48:36
Done.
|
- bool isPrivate() => !isEmpty && stringValue.codeUnitAt(0) == $_; |
+ static String canonicalizedString(String s, bool canonicalize) { |
+ if (!canonicalize) return s; |
+ var result = canonicalizedSubstrings.lookup(s); |
+ if (result != null) return result; |
+ canonicalizedSubstrings.add(s); |
+ return s; |
+ } |
+ |
+ static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) { |
+ var s; |
+ if (asciiOnly) { |
+ // getRange returns an iterator, it does not copy the data. |
+ s = new String.fromCharCodes(data.getRange(start, end)); |
+ } else { |
+ // TODO(lry): this is measurably slow. Also sublist is copied eagerly. |
+ var bytes = data.sublist(start, end); |
+ s = UTF8.decode(bytes); |
+ } |
+ return canonicalizedString(s, true); |
+ } |
} |
-class StringCodeIterator implements Iterator<int> { |
- final String string; |
- int index; |
- final int end; |
- int _current; |
+/** |
+ * This class represents the necessary information to compute a substring |
+ * lazily. The substring can either originate in a string or in a [:List<int>:] |
ngeoffray
2013/10/18 10:19:37
originate in -> originate from?
lukas
2013/10/24 16:48:36
Done.
|
+ * of UTF-8 bytes. |
+ */ |
+abstract class LazySubstring { |
+ /** The original data, either a string or a List<int> */ |
+ get data; |
- StringCodeIterator(String string) : |
- this.string = string, index = 0, end = string.length; |
+ int get start; |
+ int get length; |
- StringCodeIterator.substring(this.string, this.index, this.end) { |
- assert(0 <= index); |
- assert(index <= end); |
- assert(end <= string.length); |
+ /** |
+ * If this substring is based on a String, the boolean indicates wheter the |
ngeoffray
2013/10/18 10:19:37
the boolean -> [boolValue]
lukas
2013/10/24 16:48:36
Done.
|
+ * resulting substring should be canonicalized. |
+ * |
+ * For substrings based on a byte array, the boolean value is true if the |
ngeoffray
2013/10/18 10:19:37
ditto
lukas
2013/10/24 16:48:36
Done.
|
+ * array only holds ASCII characters. The resulting substring will be |
+ * canonicalized after decoding. |
+ */ |
+ bool get boolValue; |
+ |
+ LazySubstring.internal(); |
+ |
+ factory LazySubstring(data, int start, int length, bool b) { |
+ // See comment on [CompactLazySubstring]. |
+ if (start < 0x100000 && length < 0x200) { |
+ int fields = (start << 9); |
+ fields = fields | length; |
+ fields = fields << 1; |
+ if (b) fields |= 1; |
+ return new CompactLazySubstring(data, fields); |
+ } else { |
+ return new FullLazySubstring(data, start, length, b); |
+ } |
} |
+} |
- int get current => _current; |
+/** |
+ * This class encodes [start], [length] and [boolValue] in a single |
+ * 30 bit integer. It uses 20 bits for [start], which covers source files |
+ * of 1M. [length] has 9 bits, which covers 512 characters. |
ngeoffray
2013/10/18 10:19:37
1M -> 1MB.
lukas
2013/10/24 16:48:36
Done.
|
+ * |
+ * The file html_dart2js.dart is currently around 1M. |
ngeoffray
2013/10/18 10:19:37
1M -> 1MB
lukas
2013/10/24 16:48:36
Done.
|
+ */ |
+class CompactLazySubstring extends LazySubstring { |
+ final data; |
+ final int fields; |
- bool moveNext() { |
- _current = null; |
- if (index >= end) return false; |
- _current = string.codeUnitAt(index++); |
- return true; |
- } |
+ CompactLazySubstring(this.data, this.fields) : super.internal(); |
+ |
+ int get start => fields >> 10; |
+ int get length => (fields >> 1) & 0x1ff; |
+ bool get boolValue => (fields & 1) == 1; |
} |
-class BeginGroupToken extends StringToken { |
- Token endGroup; |
- BeginGroupToken(PrecedenceInfo info, String value, int charOffset) |
- : super(info, value, charOffset); |
+class FullLazySubstring extends LazySubstring { |
+ final data; |
+ final int start; |
+ final int length; |
+ final bool boolValue; |
+ FullLazySubstring(this.data, this.start, this.length, this.boolValue) |
+ : super.internal(); |
} |
bool isUserDefinableOperator(String value) { |