OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library dart2js.tokens; |
| 6 |
| 7 import 'dart:collection' show HashSet; |
| 8 import 'dart:convert' show UTF8; |
| 9 |
| 10 import '../common.dart'; |
| 11 import '../util/util.dart' show computeHashCode; |
| 12 import 'keyword.dart' show Keyword; |
| 13 import 'precedence.dart' show PrecedenceInfo; |
| 14 import 'precedence_constants.dart' as Precedence show BAD_INPUT_INFO; |
| 15 import 'token_constants.dart' as Tokens show IDENTIFIER_TOKEN; |
| 16 |
| 17 /** |
| 18 * A token that doubles as a linked list. |
| 19 */ |
| 20 abstract class Token implements Spannable { |
| 21 /** |
| 22 * The character offset of the start of this token within the source text. |
| 23 */ |
| 24 final int charOffset; |
| 25 |
| 26 Token(this.charOffset); |
| 27 |
| 28 /** |
| 29 * The next token in the token stream. |
| 30 */ |
| 31 Token next; |
| 32 |
| 33 /** |
| 34 * The precedence info for this token. [info] determines the kind and the |
| 35 * precedence level of this token. |
| 36 * |
| 37 * Defined as getter to save a field in the [KeywordToken] subclass. |
| 38 */ |
| 39 PrecedenceInfo get info; |
| 40 |
| 41 /** |
| 42 * The string represented by this token, a substring of the source code. |
| 43 * |
| 44 * For [StringToken]s the [value] includes the quotes, explicit escapes, etc. |
| 45 */ |
| 46 String get value; |
| 47 |
| 48 /** |
| 49 * For symbol and keyword tokens, returns the string value represented by this |
| 50 * token. For [StringToken]s this method returns [:null:]. |
| 51 * |
| 52 * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time |
| 53 * constant originating in the [PrecedenceInfo] or in the [Keyword] instance. |
| 54 * This allows testing for keywords and symbols using [:identical:], e.g., |
| 55 * [:identical('class', token.value):]. |
| 56 * |
| 57 * Note that returning [:null:] for string tokens is important to identify |
| 58 * symbols and keywords, we cannot use [value] instead. The string literal |
| 59 * "$a($b" |
| 60 * produces ..., SymbolToken($), StringToken(a), StringToken((), ... |
| 61 * |
| 62 * After parsing the identifier 'a', the parser tests for a function |
| 63 * declaration using [:identical(next.stringValue, '('):], which (rightfully) |
| 64 * returns false because stringValue returns [:null:]. |
| 65 */ |
| 66 String get stringValue; |
| 67 |
| 68 /** |
| 69 * The kind enum of this token as determined by its [info]. |
| 70 */ |
| 71 int get kind => info.kind; |
| 72 |
| 73 /** |
| 74 * The precedence level for this token. |
| 75 */ |
| 76 int get precedence => info.precedence; |
| 77 |
| 78 /** |
| 79 * True if this token is an identifier. Some keywords allowed as identifiers, |
| 80 * see implementation in [KeywordToken]. |
| 81 */ |
| 82 bool isIdentifier(); |
| 83 |
| 84 /** |
| 85 * Returns a textual representation of this token to be used for debugging |
| 86 * purposes. The resulting string might contain information about the |
| 87 * structure of the token, for example 'StringToken(foo)' for the identifier |
| 88 * token 'foo'. |
| 89 * |
| 90 * Use [value] for the text actually parsed by the token. |
| 91 */ |
| 92 String toString(); |
| 93 |
| 94 /** |
| 95 * The number of characters parsed by this token. |
| 96 */ |
| 97 int get charCount { |
| 98 if (info == Precedence.BAD_INPUT_INFO) { |
| 99 // This is a token that wraps around an error message. Return 1 |
| 100 // instead of the size of the length of the error message. |
| 101 return 1; |
| 102 } else { |
| 103 return value.length; |
| 104 } |
| 105 } |
| 106 |
| 107 /// The character offset of the end of this token within the source text. |
| 108 int get charEnd => charOffset + charCount; |
| 109 |
| 110 int get hashCode => computeHashCode(charOffset, info, value); |
| 111 } |
| 112 |
| 113 /// A pair of tokens marking the beginning and the end of a span. Use for error |
| 114 /// reporting. |
| 115 class TokenPair implements Spannable { |
| 116 final Token begin; |
| 117 final Token end; |
| 118 |
| 119 TokenPair(this.begin, this.end); |
| 120 } |
| 121 |
| 122 /** |
| 123 * A [SymbolToken] represents the symbol in its precendence info. |
| 124 * Also used for end of file with EOF_INFO. |
| 125 */ |
| 126 class SymbolToken extends Token { |
| 127 final PrecedenceInfo info; |
| 128 |
| 129 SymbolToken(this.info, int charOffset) : super(charOffset); |
| 130 |
| 131 String get value => info.value; |
| 132 |
| 133 String get stringValue => info.value; |
| 134 |
| 135 bool isIdentifier() => false; |
| 136 |
| 137 String toString() => "SymbolToken($value)"; |
| 138 } |
| 139 |
| 140 /** |
| 141 * A [BeginGroupToken] represents a symbol that may be the beginning of |
| 142 * a pair of brackets, i.e., ( { [ < or ${ |
| 143 * The [endGroup] token points to the matching closing bracked in case |
| 144 * it can be identified during scanning. |
| 145 */ |
| 146 class BeginGroupToken extends SymbolToken { |
| 147 Token endGroup; |
| 148 |
| 149 BeginGroupToken(PrecedenceInfo info, int charOffset) |
| 150 : super(info, charOffset); |
| 151 } |
| 152 |
| 153 /** |
| 154 * A keyword token. |
| 155 */ |
| 156 class KeywordToken extends Token { |
| 157 final Keyword keyword; |
| 158 |
| 159 KeywordToken(this.keyword, int charOffset) : super(charOffset); |
| 160 |
| 161 PrecedenceInfo get info => keyword.info; |
| 162 |
| 163 String get value => keyword.syntax; |
| 164 |
| 165 String get stringValue => keyword.syntax; |
| 166 |
| 167 bool isIdentifier() => keyword.isPseudo || keyword.isBuiltIn; |
| 168 |
| 169 String toString() => "KeywordToken($value)"; |
| 170 } |
| 171 |
| 172 abstract class ErrorToken extends Token { |
| 173 ErrorToken(int charOffset) : super(charOffset); |
| 174 |
| 175 PrecedenceInfo get info => Precedence.BAD_INPUT_INFO; |
| 176 |
| 177 String get value { |
| 178 throw new SpannableAssertionFailure(this, assertionMessage); |
| 179 } |
| 180 |
| 181 String get stringValue => null; |
| 182 |
| 183 bool isIdentifier() => false; |
| 184 |
| 185 String get assertionMessage; |
| 186 } |
| 187 |
| 188 class BadInputToken extends ErrorToken { |
| 189 final int character; |
| 190 |
| 191 BadInputToken(this.character, int charOffset) : super(charOffset); |
| 192 |
| 193 String toString() => "BadInputToken($character)"; |
| 194 |
| 195 String get assertionMessage { |
| 196 return 'Character U+${character.toRadixString(16)} not allowed here.'; |
| 197 } |
| 198 } |
| 199 |
| 200 class UnterminatedToken extends ErrorToken { |
| 201 final String start; |
| 202 final int endOffset; |
| 203 |
| 204 UnterminatedToken(this.start, int charOffset, this.endOffset) |
| 205 : super(charOffset); |
| 206 |
| 207 String toString() => "UnterminatedToken($start)"; |
| 208 |
| 209 String get assertionMessage => "'$start' isn't terminated."; |
| 210 |
| 211 int get charCount => endOffset - charOffset; |
| 212 } |
| 213 |
| 214 class UnmatchedToken extends ErrorToken { |
| 215 final BeginGroupToken begin; |
| 216 |
| 217 UnmatchedToken(BeginGroupToken begin) |
| 218 : this.begin = begin, |
| 219 super(begin.charOffset); |
| 220 |
| 221 String toString() => "UnmatchedToken(${begin.value})"; |
| 222 |
| 223 String get assertionMessage => "'$begin' isn't closed."; |
| 224 } |
| 225 |
| 226 /** |
| 227 * A String-valued token. Represents identifiers, string literals, |
| 228 * number literals, comments, and error tokens, using the corresponding |
| 229 * precedence info. |
| 230 */ |
| 231 class StringToken extends Token { |
| 232 /** |
| 233 * The length threshold above which substring tokens are computed lazily. |
| 234 * |
| 235 * For string tokens that are substrings of the program source, the actual |
| 236 * substring extraction is performed lazily. This is beneficial because |
| 237 * not all scanned code is actually used. For unused parts, the substrings |
| 238 * are never computed and allocated. |
| 239 */ |
| 240 static const int LAZY_THRESHOLD = 4; |
| 241 |
| 242 var /* String | LazySubtring */ valueOrLazySubstring; |
| 243 |
| 244 final PrecedenceInfo info; |
| 245 |
| 246 /** |
| 247 * Creates a non-lazy string token. If [canonicalize] is true, the string |
| 248 * is canonicalized before the token is created. |
| 249 */ |
| 250 StringToken.fromString(this.info, String value, int charOffset, |
| 251 {bool canonicalize: false}) |
| 252 : valueOrLazySubstring = canonicalizedString(value, canonicalize), |
| 253 super(charOffset); |
| 254 |
| 255 /** |
| 256 * Creates a lazy string token. If [canonicalize] is true, the string |
| 257 * is canonicalized before the token is created. |
| 258 */ |
| 259 StringToken.fromSubstring( |
| 260 this.info, String data, int start, int end, int charOffset, |
| 261 {bool canonicalize: false}) |
| 262 : super(charOffset) { |
| 263 int length = end - start; |
| 264 if (length <= LAZY_THRESHOLD) { |
| 265 valueOrLazySubstring = |
| 266 canonicalizedString(data.substring(start, end), canonicalize); |
| 267 } else { |
| 268 valueOrLazySubstring = |
| 269 new LazySubstring(data, start, length, canonicalize); |
| 270 } |
| 271 } |
| 272 |
| 273 /** |
| 274 * Creates a lazy string token. If [asciiOnly] is false, the byte array |
| 275 * is passed through a UTF-8 decoder. |
| 276 */ |
| 277 StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end, |
| 278 bool asciiOnly, int charOffset) |
| 279 : super(charOffset) { |
| 280 int length = end - start; |
| 281 if (length <= LAZY_THRESHOLD) { |
| 282 valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly); |
| 283 } else { |
| 284 valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly); |
| 285 } |
| 286 } |
| 287 |
| 288 String get value { |
| 289 if (valueOrLazySubstring is String) { |
| 290 return valueOrLazySubstring; |
| 291 } else { |
| 292 assert(valueOrLazySubstring is LazySubstring); |
| 293 var data = valueOrLazySubstring.data; |
| 294 int start = valueOrLazySubstring.start; |
| 295 int end = start + valueOrLazySubstring.length; |
| 296 if (data is String) { |
| 297 valueOrLazySubstring = canonicalizedString( |
| 298 data.substring(start, end), valueOrLazySubstring.boolValue); |
| 299 } else { |
| 300 valueOrLazySubstring = |
| 301 decodeUtf8(data, start, end, valueOrLazySubstring.boolValue); |
| 302 } |
| 303 return valueOrLazySubstring; |
| 304 } |
| 305 } |
| 306 |
| 307 /// See [Token.stringValue] for an explanation. |
| 308 String get stringValue => null; |
| 309 |
| 310 bool isIdentifier() => identical(kind, Tokens.IDENTIFIER_TOKEN); |
| 311 |
| 312 String toString() => "StringToken($value)"; |
| 313 |
| 314 static final HashSet<String> canonicalizedSubstrings = new HashSet<String>(); |
| 315 |
| 316 static String canonicalizedString(String s, bool canonicalize) { |
| 317 if (!canonicalize) return s; |
| 318 var result = canonicalizedSubstrings.lookup(s); |
| 319 if (result != null) return result; |
| 320 canonicalizedSubstrings.add(s); |
| 321 return s; |
| 322 } |
| 323 |
| 324 static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) { |
| 325 var s; |
| 326 if (asciiOnly) { |
| 327 s = new String.fromCharCodes(data, start, end); |
| 328 } else { |
| 329 s = UTF8.decoder.convert(data, start, end); |
| 330 } |
| 331 return canonicalizedString(s, true); |
| 332 } |
| 333 } |
| 334 |
| 335 /** |
| 336 * This class represents the necessary information to compute a substring |
| 337 * lazily. The substring can either originate from a string or from |
| 338 * a [:List<int>:] of UTF-8 bytes. |
| 339 */ |
| 340 abstract class LazySubstring { |
| 341 /** The original data, either a string or a List<int> */ |
| 342 get data; |
| 343 |
| 344 int get start; |
| 345 int get length; |
| 346 |
| 347 /** |
| 348 * If this substring is based on a String, the [boolValue] indicates wheter |
| 349 * the resulting substring should be canonicalized. |
| 350 * |
| 351 * For substrings based on a byte array, the [boolValue] is true if the |
| 352 * array only holds ASCII characters. The resulting substring will be |
| 353 * canonicalized after decoding. |
| 354 */ |
| 355 bool get boolValue; |
| 356 |
| 357 LazySubstring.internal(); |
| 358 |
| 359 factory LazySubstring(data, int start, int length, bool b) { |
| 360 // See comment on [CompactLazySubstring]. |
| 361 if (start < 0x100000 && length < 0x200) { |
| 362 int fields = (start << 9); |
| 363 fields = fields | length; |
| 364 fields = fields << 1; |
| 365 if (b) fields |= 1; |
| 366 return new CompactLazySubstring(data, fields); |
| 367 } else { |
| 368 return new FullLazySubstring(data, start, length, b); |
| 369 } |
| 370 } |
| 371 } |
| 372 |
| 373 /** |
| 374 * This class encodes [start], [length] and [boolValue] in a single |
| 375 * 30 bit integer. It uses 20 bits for [start], which covers source files |
| 376 * of 1MB. [length] has 9 bits, which covers 512 characters. |
| 377 * |
| 378 * The file html_dart2js.dart is currently around 1MB. |
| 379 */ |
| 380 class CompactLazySubstring extends LazySubstring { |
| 381 final data; |
| 382 final int fields; |
| 383 |
| 384 CompactLazySubstring(this.data, this.fields) : super.internal(); |
| 385 |
| 386 int get start => fields >> 10; |
| 387 int get length => (fields >> 1) & 0x1ff; |
| 388 bool get boolValue => (fields & 1) == 1; |
| 389 } |
| 390 |
| 391 class FullLazySubstring extends LazySubstring { |
| 392 final data; |
| 393 final int start; |
| 394 final int length; |
| 395 final bool boolValue; |
| 396 FullLazySubstring(this.data, this.start, this.length, this.boolValue) |
| 397 : super.internal(); |
| 398 } |
| 399 |
| 400 bool isUserDefinableOperator(String value) { |
| 401 return isBinaryOperator(value) || |
| 402 isMinusOperator(value) || |
| 403 isTernaryOperator(value) || |
| 404 isUnaryOperator(value); |
| 405 } |
| 406 |
| 407 bool isUnaryOperator(String value) => value == '~'; |
| 408 |
| 409 bool isBinaryOperator(String value) { |
| 410 return value == '==' || |
| 411 value == '[]' || |
| 412 value == '*' || |
| 413 value == '/' || |
| 414 value == '%' || |
| 415 value == '~/' || |
| 416 value == '+' || |
| 417 value == '<<' || |
| 418 value == '>>' || |
| 419 value == '>=' || |
| 420 value == '>' || |
| 421 value == '<=' || |
| 422 value == '<' || |
| 423 value == '&' || |
| 424 value == '^' || |
| 425 value == '|'; |
| 426 } |
| 427 |
| 428 bool isTernaryOperator(String value) => value == '[]='; |
| 429 |
| 430 bool isMinusOperator(String value) => value == '-'; |
OLD | NEW |