pkg/dart_scanner/lib/src/token.dart - Issue 2621153006: Copy scanner and parser to own packages.

Side by Side Diff: pkg/dart_scanner/lib/src/token.dart

Issue 2621153006: Copy scanner and parser to own packages. (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file

	2 // for details. All rights reserved. Use of this source code is governed by a

	3 // BSD-style license that can be found in the LICENSE file.

	4

	5 library dart2js.tokens;

	6

	7 import 'dart:collection' show HashSet;

	8 import 'dart:convert' show UTF8;

	9

	10 import '../common.dart';

	11 import '../util/util.dart' show computeHashCode;

	12 import 'keyword.dart' show Keyword;

	13 import 'precedence.dart' show PrecedenceInfo;

	14 import 'precedence_constants.dart' as Precedence show BAD_INPUT_INFO;

	15 import 'token_constants.dart' as Tokens show IDENTIFIER_TOKEN;

	16

	17 /**

	18 * A token that doubles as a linked list.

	19 */

	20 abstract class Token implements Spannable {

	21 /**

	22 * The character offset of the start of this token within the source text.

	23 */

	24 final int charOffset;

	25

	26 Token(this.charOffset);

	27

	28 /**

	29 * The next token in the token stream.

	30 */

	31 Token next;

	32

	33 /**

	34 * The precedence info for this token. [info] determines the kind and the

	35 * precedence level of this token.

	36 *

	37 * Defined as getter to save a field in the [KeywordToken] subclass.

	38 */

	39 PrecedenceInfo get info;

	40

	41 /**

	42 * The string represented by this token, a substring of the source code.

	43 *

	44 * For [StringToken]s the [value] includes the quotes, explicit escapes, etc.

	45 */

	46 String get value;

	47

	48 /**

	49 * For symbol and keyword tokens, returns the string value represented by this

	50 * token. For [StringToken]s this method returns [:null:].

	51 *

	52 * For [SymbolToken]s and [KeywordToken]s, the string value is a compile-time

	53 * constant originating in the [PrecedenceInfo] or in the [Keyword] instance.

	54 * This allows testing for keywords and symbols using [:identical:], e.g.,

	55 * [:identical('class', token.value):].

	56 *

	57 * Note that returning [:null:] for string tokens is important to identify

	58 * symbols and keywords, we cannot use [value] instead. The string literal

	59 * "$a($b"

	60 * produces ..., SymbolToken($), StringToken(a), StringToken((), ...

	61 *

	62 * After parsing the identifier 'a', the parser tests for a function

	63 * declaration using [:identical(next.stringValue, '('):], which (rightfully)

	64 * returns false because stringValue returns [:null:].

	65 */

	66 String get stringValue;

	67

	68 /**

	69 * The kind enum of this token as determined by its [info].

	70 */

	71 int get kind => info.kind;

	72

	73 /**

	74 * The precedence level for this token.

	75 */

	76 int get precedence => info.precedence;

	77

	78 /**

	79 * True if this token is an identifier. Some keywords allowed as identifiers,

	80 * see implementation in [KeywordToken].

	81 */

	82 bool isIdentifier();

	83

	84 /**

	85 * Returns a textual representation of this token to be used for debugging

	86 * purposes. The resulting string might contain information about the

	87 * structure of the token, for example 'StringToken(foo)' for the identifier

	88 * token 'foo'.

	89 *

	90 * Use [value] for the text actually parsed by the token.

	91 */

	92 String toString();

	93

	94 /**

	95 * The number of characters parsed by this token.

	96 */

	97 int get charCount {

	98 if (info == Precedence.BAD_INPUT_INFO) {

	99 // This is a token that wraps around an error message. Return 1

	100 // instead of the size of the length of the error message.

	101 return 1;

	102 } else {

	103 return value.length;

	104 }

	105 }

	106

	107 /// The character offset of the end of this token within the source text.

	108 int get charEnd => charOffset + charCount;

	109

	110 int get hashCode => computeHashCode(charOffset, info, value);

	111 }

	112

	113 /// A pair of tokens marking the beginning and the end of a span. Use for error

	114 /// reporting.

	115 class TokenPair implements Spannable {

	116 final Token begin;

	117 final Token end;

	118

	119 TokenPair(this.begin, this.end);

	120 }

	121

	122 /**

	123 * A [SymbolToken] represents the symbol in its precendence info.

	124 * Also used for end of file with EOF_INFO.

	125 */

	126 class SymbolToken extends Token {

	127 final PrecedenceInfo info;

	128

	129 SymbolToken(this.info, int charOffset) : super(charOffset);

	130

	131 String get value => info.value;

	132

	133 String get stringValue => info.value;

	134

	135 bool isIdentifier() => false;

	136

	137 String toString() => "SymbolToken($value)";

	138 }

	139

	140 /**

	141 * A [BeginGroupToken] represents a symbol that may be the beginning of

	142 * a pair of brackets, i.e., ( { [ < or ${

	143 * The [endGroup] token points to the matching closing bracked in case

	144 * it can be identified during scanning.

	145 */

	146 class BeginGroupToken extends SymbolToken {

	147 Token endGroup;

	148

	149 BeginGroupToken(PrecedenceInfo info, int charOffset)

	150 : super(info, charOffset);

	151 }

	152

	153 /**

	154 * A keyword token.

	155 */

	156 class KeywordToken extends Token {

	157 final Keyword keyword;

	158

	159 KeywordToken(this.keyword, int charOffset) : super(charOffset);

	160

	161 PrecedenceInfo get info => keyword.info;

	162

	163 String get value => keyword.syntax;

	164

	165 String get stringValue => keyword.syntax;

	166

	167 bool isIdentifier() => keyword.isPseudo \|\| keyword.isBuiltIn;

	168

	169 String toString() => "KeywordToken($value)";

	170 }

	171

	172 abstract class ErrorToken extends Token {

	173 ErrorToken(int charOffset) : super(charOffset);

	174

	175 PrecedenceInfo get info => Precedence.BAD_INPUT_INFO;

	176

	177 String get value {

	178 throw new SpannableAssertionFailure(this, assertionMessage);

	179 }

	180

	181 String get stringValue => null;

	182

	183 bool isIdentifier() => false;

	184

	185 String get assertionMessage;

	186 }

	187

	188 class BadInputToken extends ErrorToken {

	189 final int character;

	190

	191 BadInputToken(this.character, int charOffset) : super(charOffset);

	192

	193 String toString() => "BadInputToken($character)";

	194

	195 String get assertionMessage {

	196 return 'Character U+${character.toRadixString(16)} not allowed here.';

	197 }

	198 }

	199

	200 class UnterminatedToken extends ErrorToken {

	201 final String start;

	202 final int endOffset;

	203

	204 UnterminatedToken(this.start, int charOffset, this.endOffset)

	205 : super(charOffset);

	206

	207 String toString() => "UnterminatedToken($start)";

	208

	209 String get assertionMessage => "'$start' isn't terminated.";

	210

	211 int get charCount => endOffset - charOffset;

	212 }

	213

	214 class UnmatchedToken extends ErrorToken {

	215 final BeginGroupToken begin;

	216

	217 UnmatchedToken(BeginGroupToken begin)

	218 : this.begin = begin,

	219 super(begin.charOffset);

	220

	221 String toString() => "UnmatchedToken(${begin.value})";

	222

	223 String get assertionMessage => "'$begin' isn't closed.";

	224 }

	225

	226 /**

	227 * A String-valued token. Represents identifiers, string literals,

	228 * number literals, comments, and error tokens, using the corresponding

	229 * precedence info.

	230 */

	231 class StringToken extends Token {

	232 /**

	233 * The length threshold above which substring tokens are computed lazily.

	234 *

	235 * For string tokens that are substrings of the program source, the actual

	236 * substring extraction is performed lazily. This is beneficial because

	237 * not all scanned code is actually used. For unused parts, the substrings

	238 * are never computed and allocated.

	239 */

	240 static const int LAZY_THRESHOLD = 4;

	241

	242 var /* String \| LazySubtring */ valueOrLazySubstring;

	243

	244 final PrecedenceInfo info;

	245

	246 /**

	247 * Creates a non-lazy string token. If [canonicalize] is true, the string

	248 * is canonicalized before the token is created.

	249 */

	250 StringToken.fromString(this.info, String value, int charOffset,

	251 {bool canonicalize: false})

	252 : valueOrLazySubstring = canonicalizedString(value, canonicalize),

	253 super(charOffset);

	254

	255 /**

	256 * Creates a lazy string token. If [canonicalize] is true, the string

	257 * is canonicalized before the token is created.

	258 */

	259 StringToken.fromSubstring(

	260 this.info, String data, int start, int end, int charOffset,

	261 {bool canonicalize: false})

	262 : super(charOffset) {

	263 int length = end - start;

	264 if (length <= LAZY_THRESHOLD) {

	265 valueOrLazySubstring =

	266 canonicalizedString(data.substring(start, end), canonicalize);

	267 } else {

	268 valueOrLazySubstring =

	269 new LazySubstring(data, start, length, canonicalize);

	270 }

	271 }

	272

	273 /**

	274 * Creates a lazy string token. If [asciiOnly] is false, the byte array

	275 * is passed through a UTF-8 decoder.

	276 */

	277 StringToken.fromUtf8Bytes(this.info, List<int> data, int start, int end,

	278 bool asciiOnly, int charOffset)

	279 : super(charOffset) {

	280 int length = end - start;

	281 if (length <= LAZY_THRESHOLD) {

	282 valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly);

	283 } else {

	284 valueOrLazySubstring = new LazySubstring(data, start, length, asciiOnly);

	285 }

	286 }

	287

	288 String get value {

	289 if (valueOrLazySubstring is String) {

	290 return valueOrLazySubstring;

	291 } else {

	292 assert(valueOrLazySubstring is LazySubstring);

	293 var data = valueOrLazySubstring.data;

	294 int start = valueOrLazySubstring.start;

	295 int end = start + valueOrLazySubstring.length;

	296 if (data is String) {

	297 valueOrLazySubstring = canonicalizedString(

	298 data.substring(start, end), valueOrLazySubstring.boolValue);

	299 } else {

	300 valueOrLazySubstring =

	301 decodeUtf8(data, start, end, valueOrLazySubstring.boolValue);

	302 }

	303 return valueOrLazySubstring;

	304 }

	305 }

	306

	307 /// See [Token.stringValue] for an explanation.

	308 String get stringValue => null;

	309

	310 bool isIdentifier() => identical(kind, Tokens.IDENTIFIER_TOKEN);

	311

	312 String toString() => "StringToken($value)";

	313

	314 static final HashSet<String> canonicalizedSubstrings = new HashSet<String>();

	315

	316 static String canonicalizedString(String s, bool canonicalize) {

	317 if (!canonicalize) return s;

	318 var result = canonicalizedSubstrings.lookup(s);

	319 if (result != null) return result;

	320 canonicalizedSubstrings.add(s);

	321 return s;

	322 }

	323

	324 static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) {

	325 var s;

	326 if (asciiOnly) {

	327 s = new String.fromCharCodes(data, start, end);

	328 } else {

	329 s = UTF8.decoder.convert(data, start, end);

	330 }

	331 return canonicalizedString(s, true);

	332 }

	333 }

	334

	335 /**

	336 * This class represents the necessary information to compute a substring

	337 * lazily. The substring can either originate from a string or from

	338 * a [:List<int>:] of UTF-8 bytes.

	339 */

	340 abstract class LazySubstring {

	341 /** The original data, either a string or a List<int> */

	342 get data;

	343

	344 int get start;

	345 int get length;

	346

	347 /**

	348 * If this substring is based on a String, the [boolValue] indicates wheter

	349 * the resulting substring should be canonicalized.

	350 *

	351 * For substrings based on a byte array, the [boolValue] is true if the

	352 * array only holds ASCII characters. The resulting substring will be

	353 * canonicalized after decoding.

	354 */

	355 bool get boolValue;

	356

	357 LazySubstring.internal();

	358

	359 factory LazySubstring(data, int start, int length, bool b) {

	360 // See comment on [CompactLazySubstring].

	361 if (start < 0x100000 && length < 0x200) {

	362 int fields = (start << 9);

	363 fields = fields \| length;

	364 fields = fields << 1;

	365 if (b) fields \|= 1;

	366 return new CompactLazySubstring(data, fields);

	367 } else {

	368 return new FullLazySubstring(data, start, length, b);

	369 }

	370 }

	371 }

	372

	373 /**

	374 * This class encodes [start], [length] and [boolValue] in a single

	375 * 30 bit integer. It uses 20 bits for [start], which covers source files

	376 * of 1MB. [length] has 9 bits, which covers 512 characters.

	377 *

	378 * The file html_dart2js.dart is currently around 1MB.

	379 */

	380 class CompactLazySubstring extends LazySubstring {

	381 final data;

	382 final int fields;

	383

	384 CompactLazySubstring(this.data, this.fields) : super.internal();

	385

	386 int get start => fields >> 10;

	387 int get length => (fields >> 1) & 0x1ff;

	388 bool get boolValue => (fields & 1) == 1;

	389 }

	390

	391 class FullLazySubstring extends LazySubstring {

	392 final data;

	393 final int start;

	394 final int length;

	395 final bool boolValue;

	396 FullLazySubstring(this.data, this.start, this.length, this.boolValue)

	397 : super.internal();

	398 }

	399

	400 bool isUserDefinableOperator(String value) {

	401 return isBinaryOperator(value) \|\|

	402 isMinusOperator(value) \|\|

	403 isTernaryOperator(value) \|\|

	404 isUnaryOperator(value);

	405 }

	406

	407 bool isUnaryOperator(String value) => value == '~';

	408

	409 bool isBinaryOperator(String value) {

	410 return value == '==' \|\|

	411 value == '[]' \|\|

	412 value == '*' \|\|

	413 value == '/' \|\|

	414 value == '%' \|\|

	415 value == '~/' \|\|

	416 value == '+' \|\|

	417 value == '<<' \|\|

	418 value == '>>' \|\|

	419 value == '>=' \|\|

	420 value == '>' \|\|

	421 value == '<=' \|\|

	422 value == '<' \|\|

	423 value == '&' \|\|

	424 value == '^' \|\|

	425 value == '\|';

	426 }

	427

	428 bool isTernaryOperator(String value) => value == '[]=';

	429

	430 bool isMinusOperator(String value) => value == '-';

OLD	NEW

« no previous file with comments | « pkg/dart_scanner/lib/src/precedence.dart ('k') | pkg/dart_scanner/lib/src/token_constants.dart » ('j') | no next file with comments »