Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(330)

Side by Side Diff: pkg/analyzer/lib/src/dart/scanner/scanner.dart

Issue 2486873003: Move scanner into pkg/front_end/lib/src/scanner. (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 library analyzer.src.dart.scanner.scanner; 5 library analyzer.src.dart.scanner.scanner;
6 6
7 import 'package:analyzer/dart/ast/token.dart';
8 import 'package:analyzer/error/error.dart'; 7 import 'package:analyzer/error/error.dart';
9 import 'package:analyzer/error/listener.dart'; 8 import 'package:analyzer/error/listener.dart';
10 import 'package:analyzer/src/dart/ast/token.dart';
11 import 'package:analyzer/src/dart/error/syntactic_errors.dart'; 9 import 'package:analyzer/src/dart/error/syntactic_errors.dart';
12 import 'package:analyzer/src/dart/scanner/reader.dart'; 10 import 'package:analyzer/src/dart/scanner/reader.dart';
13 import 'package:analyzer/src/generated/java_engine.dart';
14 import 'package:analyzer/src/generated/source.dart'; 11 import 'package:analyzer/src/generated/source.dart';
15 import 'package:charcode/ascii.dart'; 12 import 'package:front_end/src/scanner/scanner.dart' as fe;
16 13
17 export 'package:analyzer/src/dart/error/syntactic_errors.dart'; 14 export 'package:analyzer/src/dart/error/syntactic_errors.dart';
18 15 export 'package:front_end/src/scanner/scanner.dart' show KeywordState;
19 /**
20 * A state in a state machine used to scan keywords.
21 */
22 class KeywordState {
23 /**
24 * An empty transition table used by leaf states.
25 */
26 static List<KeywordState> _EMPTY_TABLE = new List<KeywordState>(26);
27
28 /**
29 * The initial state in the state machine.
30 */
31 static final KeywordState KEYWORD_STATE = _createKeywordStateTable();
32
33 /**
34 * A table mapping characters to the states to which those characters will
35 * transition. (The index into the array is the offset from the character
36 * `'a'` to the transitioning character.)
37 */
38 final List<KeywordState> _table;
39
40 /**
41 * The keyword that is recognized by this state, or `null` if this state is
42 * not a terminal state.
43 */
44 Keyword _keyword;
45
46 /**
47 * Initialize a newly created state to have the given transitions and to
48 * recognize the keyword with the given [syntax].
49 */
50 KeywordState(this._table, String syntax) {
51 this._keyword = (syntax == null) ? null : Keyword.keywords[syntax];
52 }
53
54 /**
55 * Return the keyword that was recognized by this state, or `null` if this
56 * state does not recognized a keyword.
57 */
58 Keyword keyword() => _keyword;
59
60 /**
61 * Return the state that follows this state on a transition of the given
62 * [character], or `null` if there is no valid state reachable from this state
63 * with such a transition.
64 */
65 KeywordState next(int character) => _table[character - $a];
66
67 /**
68 * Create the next state in the state machine where we have already recognized
69 * the subset of strings in the given array of [strings] starting at the given
70 * [offset] and having the given [length]. All of these strings have a common
71 * prefix and the next character is at the given [start] index.
72 */
73 static KeywordState _computeKeywordStateTable(
74 int start, List<String> strings, int offset, int length) {
75 List<KeywordState> result = new List<KeywordState>(26);
76 assert(length != 0);
77 int chunk = $nul;
78 int chunkStart = -1;
79 bool isLeaf = false;
80 for (int i = offset; i < offset + length; i++) {
81 if (strings[i].length == start) {
82 isLeaf = true;
83 }
84 if (strings[i].length > start) {
85 int c = strings[i].codeUnitAt(start);
86 if (chunk != c) {
87 if (chunkStart != -1) {
88 result[chunk - $a] = _computeKeywordStateTable(
89 start + 1, strings, chunkStart, i - chunkStart);
90 }
91 chunkStart = i;
92 chunk = c;
93 }
94 }
95 }
96 if (chunkStart != -1) {
97 assert(result[chunk - $a] == null);
98 result[chunk - $a] = _computeKeywordStateTable(
99 start + 1, strings, chunkStart, offset + length - chunkStart);
100 } else {
101 assert(length == 1);
102 return new KeywordState(_EMPTY_TABLE, strings[offset]);
103 }
104 if (isLeaf) {
105 return new KeywordState(result, strings[offset]);
106 } else {
107 return new KeywordState(result, null);
108 }
109 }
110
111 /**
112 * Create and return the initial state in the state machine.
113 */
114 static KeywordState _createKeywordStateTable() {
115 List<Keyword> values = Keyword.values;
116 List<String> strings = new List<String>(values.length);
117 for (int i = 0; i < values.length; i++) {
118 strings[i] = values[i].syntax;
119 }
120 strings.sort();
121 return _computeKeywordStateTable(0, strings, 0, strings.length);
122 }
123 }
124 16
125 /** 17 /**
126 * The class `Scanner` implements a scanner for Dart code. 18 * The class `Scanner` implements a scanner for Dart code.
127 * 19 *
128 * The lexical structure of Dart is ambiguous without knowledge of the context 20 * The lexical structure of Dart is ambiguous without knowledge of the context
129 * in which a token is being scanned. For example, without context we cannot 21 * in which a token is being scanned. For example, without context we cannot
130 * determine whether source of the form "<<" should be scanned as a single 22 * determine whether source of the form "<<" should be scanned as a single
131 * left-shift operator or as two left angle brackets. This scanner does not have 23 * left-shift operator or as two left angle brackets. This scanner does not have
132 * any context, so it always resolves such conflicts by scanning the longest 24 * any context, so it always resolves such conflicts by scanning the longest
133 * possible token. 25 * possible token.
134 */ 26 */
135 class Scanner { 27 class Scanner extends fe.Scanner {
136 /** 28 /**
137 * The source being scanned. 29 * The source being scanned.
138 */ 30 */
139 final Source source; 31 final Source source;
140 32
141 /** 33 /**
142 * The reader used to access the characters in the source.
143 */
144 final CharacterReader _reader;
145
146 /**
147 * The error listener that will be informed of any errors that are found 34 * The error listener that will be informed of any errors that are found
148 * during the scan. 35 * during the scan.
149 */ 36 */
150 final AnalysisErrorListener _errorListener; 37 final AnalysisErrorListener _errorListener;
151 38
152 /** 39 /**
153 * The flag specifying whether documentation comments should be parsed.
154 */
155 bool _preserveComments = true;
156
157 /**
158 * The token pointing to the head of the linked list of tokens.
159 */
160 Token _tokens;
161
162 /**
163 * The last token that was scanned.
164 */
165 Token _tail;
166
167 /**
168 * The first token in the list of comment tokens found since the last
169 * non-comment token.
170 */
171 Token _firstComment;
172
173 /**
174 * The last token in the list of comment tokens found since the last
175 * non-comment token.
176 */
177 Token _lastComment;
178
179 /**
180 * The index of the first character of the current token.
181 */
182 int _tokenStart = 0;
183
184 /**
185 * A list containing the offsets of the first character of each line in the
186 * source code.
187 */
188 List<int> _lineStarts = new List<int>();
189
190 /**
191 * A list, treated something like a stack, of tokens representing the
192 * beginning of a matched pair. It is used to pair the end tokens with the
193 * begin tokens.
194 */
195 List<BeginToken> _groupingStack = new List<BeginToken>();
196
197 /**
198 * The index of the last item in the [_groupingStack], or `-1` if the stack is
199 * empty.
200 */
201 int _stackEnd = -1;
202
203 /**
204 * A flag indicating whether any unmatched groups were found during the parse.
205 */
206 bool _hasUnmatchedGroups = false;
207
208 /**
209 * A flag indicating whether to parse generic method comments, of the form
210 * `/*=T*/` and `/*<T>*/`.
211 */
212 bool scanGenericMethodComments = false;
213
214 /**
215 * A flag indicating whether the lazy compound assignment operators '&&=' and
216 * '||=' are enabled.
217 */
218 bool scanLazyAssignmentOperators = false;
219
220 /**
221 * Initialize a newly created scanner to scan characters from the given 40 * Initialize a newly created scanner to scan characters from the given
222 * [source]. The given character [_reader] will be used to read the characters 41 * [source]. The given character [reader] will be used to read the characters
223 * in the source. The given [_errorListener] will be informed of any errors 42 * in the source. The given [_errorListener] will be informed of any errors
224 * that are found. 43 * that are found.
225 */ 44 */
226 Scanner(this.source, this._reader, this._errorListener) { 45 Scanner(this.source, CharacterReader reader, this._errorListener)
227 _tokens = new Token(TokenType.EOF, -1); 46 : super(reader);
228 _tokens.setNext(_tokens);
229 _tail = _tokens;
230 _tokenStart = -1;
231 _lineStarts.add(0);
232 }
233 47
234 /** 48 @override
235 * Return the first token in the token stream that was scanned. 49 void reportError(
236 */ 50 ScannerErrorCode errorCode, int offset, List<Object> arguments) {
237 Token get firstToken => _tokens.next; 51 _errorListener
238 52 .onError(new AnalysisError(source, offset, 1, errorCode, arguments));
239 /**
240 * Return `true` if any unmatched groups were found during the parse.
241 */
242 bool get hasUnmatchedGroups => _hasUnmatchedGroups;
243
244 /**
245 * Return an array containing the offsets of the first character of each line
246 * in the source code.
247 */
248 List<int> get lineStarts => _lineStarts;
249
250 /**
251 * Set whether documentation tokens should be preserved.
252 */
253 void set preserveComments(bool preserveComments) {
254 this._preserveComments = preserveComments;
255 }
256
257 /**
258 * Return the last token that was scanned.
259 */
260 Token get tail => _tail;
261
262 /**
263 * Append the given [token] to the end of the token stream being scanned. This
264 * method is intended to be used by subclasses that copy existing tokens and
265 * should not normally be used because it will fail to correctly associate any
266 * comments with the token being passed in.
267 */
268 void appendToken(Token token) {
269 _tail = _tail.setNext(token);
270 }
271
272 int bigSwitch(int next) {
273 _beginToken();
274 if (next == $cr) {
275 // '\r'
276 next = _reader.advance();
277 if (next == $lf) {
278 // '\n'
279 next = _reader.advance();
280 }
281 recordStartOfLine();
282 return next;
283 } else if (next == $lf) {
284 // '\n'
285 next = _reader.advance();
286 recordStartOfLine();
287 return next;
288 } else if (next == $tab || next == $space) {
289 // '\t' || ' '
290 return _reader.advance();
291 }
292 if (next == $r) {
293 // 'r'
294 int peek = _reader.peek();
295 if (peek == $double_quote || peek == $single_quote) {
296 // '"' || "'"
297 int start = _reader.offset;
298 return _tokenizeString(_reader.advance(), start, true);
299 }
300 }
301 if ($a <= next && next <= $z) {
302 // 'a'-'z'
303 return _tokenizeKeywordOrIdentifier(next, true);
304 }
305 if (($A <= next && next <= $Z) || next == $_ || next == $$) {
306 // 'A'-'Z' || '_' || '$'
307 return _tokenizeIdentifier(next, _reader.offset, true);
308 }
309 if (next == $lt) {
310 // '<'
311 return _tokenizeLessThan(next);
312 }
313 if (next == $gt) {
314 // '>'
315 return _tokenizeGreaterThan(next);
316 }
317 if (next == $equal) {
318 // '='
319 return _tokenizeEquals(next);
320 }
321 if (next == $exclamation) {
322 // '!'
323 return _tokenizeExclamation(next);
324 }
325 if (next == $plus) {
326 // '+'
327 return _tokenizePlus(next);
328 }
329 if (next == $minus) {
330 // '-'
331 return _tokenizeMinus(next);
332 }
333 if (next == $asterisk) {
334 // '*'
335 return _tokenizeMultiply(next);
336 }
337 if (next == $percent) {
338 // '%'
339 return _tokenizePercent(next);
340 }
341 if (next == $ampersand) {
342 // '&'
343 return _tokenizeAmpersand(next);
344 }
345 if (next == $bar) {
346 // '|'
347 return _tokenizeBar(next);
348 }
349 if (next == $caret) {
350 // '^'
351 return _tokenizeCaret(next);
352 }
353 if (next == $open_bracket) {
354 // '['
355 return _tokenizeOpenSquareBracket(next);
356 }
357 if (next == $tilde) {
358 // '~'
359 return _tokenizeTilde(next);
360 }
361 if (next == $backslash) {
362 // '\\'
363 _appendTokenOfType(TokenType.BACKSLASH);
364 return _reader.advance();
365 }
366 if (next == $hash) {
367 // '#'
368 return _tokenizeTag(next);
369 }
370 if (next == $open_paren) {
371 // '('
372 _appendBeginToken(TokenType.OPEN_PAREN);
373 return _reader.advance();
374 }
375 if (next == $close_paren) {
376 // ')'
377 _appendEndToken(TokenType.CLOSE_PAREN, TokenType.OPEN_PAREN);
378 return _reader.advance();
379 }
380 if (next == $comma) {
381 // ','
382 _appendTokenOfType(TokenType.COMMA);
383 return _reader.advance();
384 }
385 if (next == $colon) {
386 // ':'
387 _appendTokenOfType(TokenType.COLON);
388 return _reader.advance();
389 }
390 if (next == $semicolon) {
391 // ';'
392 _appendTokenOfType(TokenType.SEMICOLON);
393 return _reader.advance();
394 }
395 if (next == $question) {
396 // '?'
397 return _tokenizeQuestion();
398 }
399 if (next == $close_bracket) {
400 // ']'
401 _appendEndToken(
402 TokenType.CLOSE_SQUARE_BRACKET, TokenType.OPEN_SQUARE_BRACKET);
403 return _reader.advance();
404 }
405 if (next == $backquote) {
406 // '`'
407 _appendTokenOfType(TokenType.BACKPING);
408 return _reader.advance();
409 }
410 if (next == $lbrace) {
411 // '{'
412 _appendBeginToken(TokenType.OPEN_CURLY_BRACKET);
413 return _reader.advance();
414 }
415 if (next == $rbrace) {
416 // '}'
417 _appendEndToken(
418 TokenType.CLOSE_CURLY_BRACKET, TokenType.OPEN_CURLY_BRACKET);
419 return _reader.advance();
420 }
421 if (next == $slash) {
422 // '/'
423 return _tokenizeSlashOrComment(next);
424 }
425 if (next == $at) {
426 // '@'
427 _appendTokenOfType(TokenType.AT);
428 return _reader.advance();
429 }
430 if (next == $double_quote || next == $single_quote) {
431 // '"' || "'"
432 return _tokenizeString(next, _reader.offset, false);
433 }
434 if (next == $dot) {
435 // '.'
436 return _tokenizeDotOrNumber(next);
437 }
438 if (next == $0) {
439 // '0'
440 return _tokenizeHexOrNumber(next);
441 }
442 if ($1 <= next && next <= $9) {
443 // '1'-'9'
444 return _tokenizeNumber(next);
445 }
446 if (next == -1) {
447 // EOF
448 return -1;
449 }
450 _reportError(ScannerErrorCode.ILLEGAL_CHARACTER, [next]);
451 return _reader.advance();
452 }
453
454 /**
455 * Record the fact that we are at the beginning of a new line in the source.
456 */
457 void recordStartOfLine() {
458 _lineStarts.add(_reader.offset);
459 }
460
461 /**
462 * Record that the source begins on the given [line] and [column] at the
463 * current offset as given by the reader. Both the line and the column are
464 * one-based indexes. The line starts for lines before the given line will not
465 * be correct.
466 *
467 * This method must be invoked at most one time and must be invoked before
468 * scanning begins. The values provided must be sensible. The results are
469 * undefined if these conditions are violated.
470 */
471 void setSourceStart(int line, int column) {
472 int offset = _reader.offset;
473 if (line < 1 || column < 1 || offset < 0 || (line + column - 2) >= offset) {
474 return;
475 }
476 for (int i = 2; i < line; i++) {
477 _lineStarts.add(1);
478 }
479 _lineStarts.add(offset - column + 1);
480 }
481
482 /**
483 * Scan the source code to produce a list of tokens representing the source,
484 * and return the first token in the list of tokens that were produced.
485 */
486 Token tokenize() {
487 int next = _reader.advance();
488 while (next != -1) {
489 next = bigSwitch(next);
490 }
491 _appendEofToken();
492 return firstToken;
493 }
494
495 void _appendBeginToken(TokenType type) {
496 BeginToken token;
497 if (_firstComment == null) {
498 token = new BeginToken(type, _tokenStart);
499 } else {
500 token = new BeginTokenWithComment(type, _tokenStart, _firstComment);
501 _firstComment = null;
502 _lastComment = null;
503 }
504 _tail = _tail.setNext(token);
505 _groupingStack.add(token);
506 _stackEnd++;
507 }
508
509 void _appendCommentToken(TokenType type, String value) {
510 CommentToken token = null;
511 TokenType genericComment = _matchGenericMethodCommentType(value);
512 if (genericComment != null) {
513 token = new CommentToken(genericComment, value, _tokenStart);
514 } else if (!_preserveComments) {
515 // Ignore comment tokens if client specified that it doesn't need them.
516 return;
517 } else {
518 // OK, remember comment tokens.
519 if (_isDocumentationComment(value)) {
520 token = new DocumentationCommentToken(type, value, _tokenStart);
521 } else {
522 token = new CommentToken(type, value, _tokenStart);
523 }
524 }
525 if (_firstComment == null) {
526 _firstComment = token;
527 _lastComment = _firstComment;
528 } else {
529 _lastComment = _lastComment.setNext(token);
530 }
531 }
532
533 void _appendEndToken(TokenType type, TokenType beginType) {
534 Token token;
535 if (_firstComment == null) {
536 token = new Token(type, _tokenStart);
537 } else {
538 token = new TokenWithComment(type, _tokenStart, _firstComment);
539 _firstComment = null;
540 _lastComment = null;
541 }
542 _tail = _tail.setNext(token);
543 if (_stackEnd >= 0) {
544 BeginToken begin = _groupingStack[_stackEnd];
545 if (begin.type == beginType) {
546 begin.endToken = token;
547 _groupingStack.removeAt(_stackEnd--);
548 }
549 }
550 }
551
552 void _appendEofToken() {
553 Token eofToken;
554 if (_firstComment == null) {
555 eofToken = new Token(TokenType.EOF, _reader.offset + 1);
556 } else {
557 eofToken = new TokenWithComment(
558 TokenType.EOF, _reader.offset + 1, _firstComment);
559 _firstComment = null;
560 _lastComment = null;
561 }
562 // The EOF token points to itself so that there is always infinite
563 // look-ahead.
564 eofToken.setNext(eofToken);
565 _tail = _tail.setNext(eofToken);
566 if (_stackEnd >= 0) {
567 _hasUnmatchedGroups = true;
568 // TODO(brianwilkerson) Fix the ungrouped tokens?
569 }
570 }
571
572 void _appendKeywordToken(Keyword keyword) {
573 if (_firstComment == null) {
574 _tail = _tail.setNext(new KeywordToken(keyword, _tokenStart));
575 } else {
576 _tail = _tail.setNext(
577 new KeywordTokenWithComment(keyword, _tokenStart, _firstComment));
578 _firstComment = null;
579 _lastComment = null;
580 }
581 }
582
583 void _appendStringToken(TokenType type, String value) {
584 if (_firstComment == null) {
585 _tail = _tail.setNext(new StringToken(type, value, _tokenStart));
586 } else {
587 _tail = _tail.setNext(
588 new StringTokenWithComment(type, value, _tokenStart, _firstComment));
589 _firstComment = null;
590 _lastComment = null;
591 }
592 }
593
594 void _appendStringTokenWithOffset(TokenType type, String value, int offset) {
595 if (_firstComment == null) {
596 _tail = _tail.setNext(new StringToken(type, value, _tokenStart + offset));
597 } else {
598 _tail = _tail.setNext(new StringTokenWithComment(
599 type, value, _tokenStart + offset, _firstComment));
600 _firstComment = null;
601 _lastComment = null;
602 }
603 }
604
605 void _appendTokenOfType(TokenType type) {
606 if (_firstComment == null) {
607 _tail = _tail.setNext(new Token(type, _tokenStart));
608 } else {
609 _tail =
610 _tail.setNext(new TokenWithComment(type, _tokenStart, _firstComment));
611 _firstComment = null;
612 _lastComment = null;
613 }
614 }
615
616 void _appendTokenOfTypeWithOffset(TokenType type, int offset) {
617 if (_firstComment == null) {
618 _tail = _tail.setNext(new Token(type, offset));
619 } else {
620 _tail = _tail.setNext(new TokenWithComment(type, offset, _firstComment));
621 _firstComment = null;
622 _lastComment = null;
623 }
624 }
625
626 void _beginToken() {
627 _tokenStart = _reader.offset;
628 }
629
630 /**
631 * Return the beginning token corresponding to a closing brace that was found
632 * while scanning inside a string interpolation expression. Tokens that cannot
633 * be matched with the closing brace will be dropped from the stack.
634 */
635 BeginToken _findTokenMatchingClosingBraceInInterpolationExpression() {
636 while (_stackEnd >= 0) {
637 BeginToken begin = _groupingStack[_stackEnd];
638 if (begin.type == TokenType.OPEN_CURLY_BRACKET ||
639 begin.type == TokenType.STRING_INTERPOLATION_EXPRESSION) {
640 return begin;
641 }
642 _hasUnmatchedGroups = true;
643 _groupingStack.removeAt(_stackEnd--);
644 }
645 //
646 // We should never get to this point because we wouldn't be inside a string
647 // interpolation expression unless we had previously found the start of the
648 // expression.
649 //
650 return null;
651 }
652
653 /**
654 * Checks if [value] is the start of a generic method type annotation comment.
655 *
656 * This can either be of the form `/*<T>*/` or `/*=T*/`. The token type is
657 * returned, or null if it was not a generic method comment.
658 */
659 TokenType _matchGenericMethodCommentType(String value) {
660 if (scanGenericMethodComments) {
661 // Match /*< and >*/
662 if (StringUtilities.startsWith3(value, 0, $slash, $asterisk, $lt) &&
663 StringUtilities.endsWith3(value, $gt, $asterisk, $slash)) {
664 return TokenType.GENERIC_METHOD_TYPE_LIST;
665 }
666 // Match /*=
667 if (StringUtilities.startsWith3(value, 0, $slash, $asterisk, $equal)) {
668 return TokenType.GENERIC_METHOD_TYPE_ASSIGN;
669 }
670 }
671 return null;
672 }
673
674 /**
675 * Report an error at the current offset. The [errorCode] is the error code
676 * indicating the nature of the error. The [arguments] are any arguments
677 * needed to complete the error message
678 */
679 void _reportError(ScannerErrorCode errorCode, [List<Object> arguments]) {
680 _errorListener.onError(
681 new AnalysisError(source, _reader.offset, 1, errorCode, arguments));
682 }
683
684 int _select(int choice, TokenType yesType, TokenType noType) {
685 int next = _reader.advance();
686 if (next == choice) {
687 _appendTokenOfType(yesType);
688 return _reader.advance();
689 } else {
690 _appendTokenOfType(noType);
691 return next;
692 }
693 }
694
695 int _selectWithOffset(
696 int choice, TokenType yesType, TokenType noType, int offset) {
697 int next = _reader.advance();
698 if (next == choice) {
699 _appendTokenOfTypeWithOffset(yesType, offset);
700 return _reader.advance();
701 } else {
702 _appendTokenOfTypeWithOffset(noType, offset);
703 return next;
704 }
705 }
706
707 int _tokenizeAmpersand(int next) {
708 // &&= && &= &
709 next = _reader.advance();
710 if (next == $ampersand) {
711 next = _reader.advance();
712 if (scanLazyAssignmentOperators && next == $equal) {
713 _appendTokenOfType(TokenType.AMPERSAND_AMPERSAND_EQ);
714 return _reader.advance();
715 }
716 _appendTokenOfType(TokenType.AMPERSAND_AMPERSAND);
717 return next;
718 } else if (next == $equal) {
719 _appendTokenOfType(TokenType.AMPERSAND_EQ);
720 return _reader.advance();
721 } else {
722 _appendTokenOfType(TokenType.AMPERSAND);
723 return next;
724 }
725 }
726
727 int _tokenizeBar(int next) {
728 // ||= || |= |
729 next = _reader.advance();
730 if (next == $bar) {
731 next = _reader.advance();
732 if (scanLazyAssignmentOperators && next == $equal) {
733 _appendTokenOfType(TokenType.BAR_BAR_EQ);
734 return _reader.advance();
735 }
736 _appendTokenOfType(TokenType.BAR_BAR);
737 return next;
738 } else if (next == $equal) {
739 _appendTokenOfType(TokenType.BAR_EQ);
740 return _reader.advance();
741 } else {
742 _appendTokenOfType(TokenType.BAR);
743 return next;
744 }
745 }
746
747 int _tokenizeCaret(int next) =>
748 _select($equal, TokenType.CARET_EQ, TokenType.CARET);
749
750 int _tokenizeDotOrNumber(int next) {
751 int start = _reader.offset;
752 next = _reader.advance();
753 if ($0 <= next && next <= $9) {
754 return _tokenizeFractionPart(next, start);
755 } else if ($dot == next) {
756 return _select(
757 $dot, TokenType.PERIOD_PERIOD_PERIOD, TokenType.PERIOD_PERIOD);
758 } else {
759 _appendTokenOfType(TokenType.PERIOD);
760 return next;
761 }
762 }
763
764 int _tokenizeEquals(int next) {
765 // = == =>
766 next = _reader.advance();
767 if (next == $equal) {
768 _appendTokenOfType(TokenType.EQ_EQ);
769 return _reader.advance();
770 } else if (next == $gt) {
771 _appendTokenOfType(TokenType.FUNCTION);
772 return _reader.advance();
773 }
774 _appendTokenOfType(TokenType.EQ);
775 return next;
776 }
777
778 int _tokenizeExclamation(int next) {
779 // ! !=
780 next = _reader.advance();
781 if (next == $equal) {
782 _appendTokenOfType(TokenType.BANG_EQ);
783 return _reader.advance();
784 }
785 _appendTokenOfType(TokenType.BANG);
786 return next;
787 }
788
789 int _tokenizeExponent(int next) {
790 if (next == $plus || next == $minus) {
791 next = _reader.advance();
792 }
793 bool hasDigits = false;
794 while (true) {
795 if ($0 <= next && next <= $9) {
796 hasDigits = true;
797 } else {
798 if (!hasDigits) {
799 _reportError(ScannerErrorCode.MISSING_DIGIT);
800 }
801 return next;
802 }
803 next = _reader.advance();
804 }
805 }
806
807 int _tokenizeFractionPart(int next, int start) {
808 bool done = false;
809 bool hasDigit = false;
810 LOOP:
811 while (!done) {
812 if ($0 <= next && next <= $9) {
813 hasDigit = true;
814 } else if ($e == next || $E == next) {
815 hasDigit = true;
816 next = _tokenizeExponent(_reader.advance());
817 done = true;
818 continue LOOP;
819 } else {
820 done = true;
821 continue LOOP;
822 }
823 next = _reader.advance();
824 }
825 if (!hasDigit) {
826 _appendStringToken(TokenType.INT, _reader.getString(start, -2));
827 if ($dot == next) {
828 return _selectWithOffset($dot, TokenType.PERIOD_PERIOD_PERIOD,
829 TokenType.PERIOD_PERIOD, _reader.offset - 1);
830 }
831 _appendTokenOfTypeWithOffset(TokenType.PERIOD, _reader.offset - 1);
832 return bigSwitch(next);
833 }
834 _appendStringToken(
835 TokenType.DOUBLE, _reader.getString(start, next < 0 ? 0 : -1));
836 return next;
837 }
838
839 int _tokenizeGreaterThan(int next) {
840 // > >= >> >>=
841 next = _reader.advance();
842 if ($equal == next) {
843 _appendTokenOfType(TokenType.GT_EQ);
844 return _reader.advance();
845 } else if ($gt == next) {
846 next = _reader.advance();
847 if ($equal == next) {
848 _appendTokenOfType(TokenType.GT_GT_EQ);
849 return _reader.advance();
850 } else {
851 _appendTokenOfType(TokenType.GT_GT);
852 return next;
853 }
854 } else {
855 _appendTokenOfType(TokenType.GT);
856 return next;
857 }
858 }
859
860 int _tokenizeHex(int next) {
861 int start = _reader.offset - 1;
862 bool hasDigits = false;
863 while (true) {
864 next = _reader.advance();
865 if (($0 <= next && next <= $9) ||
866 ($A <= next && next <= $F) ||
867 ($a <= next && next <= $f)) {
868 hasDigits = true;
869 } else {
870 if (!hasDigits) {
871 _reportError(ScannerErrorCode.MISSING_HEX_DIGIT);
872 }
873 _appendStringToken(
874 TokenType.HEXADECIMAL, _reader.getString(start, next < 0 ? 0 : -1));
875 return next;
876 }
877 }
878 }
879
880 int _tokenizeHexOrNumber(int next) {
881 int x = _reader.peek();
882 if (x == $x || x == $X) {
883 _reader.advance();
884 return _tokenizeHex(x);
885 }
886 return _tokenizeNumber(next);
887 }
888
889 int _tokenizeIdentifier(int next, int start, bool allowDollar) {
890 while (($a <= next && next <= $z) ||
891 ($A <= next && next <= $Z) ||
892 ($0 <= next && next <= $9) ||
893 next == $_ ||
894 (next == $$ && allowDollar)) {
895 next = _reader.advance();
896 }
897 _appendStringToken(
898 TokenType.IDENTIFIER, _reader.getString(start, next < 0 ? 0 : -1));
899 return next;
900 }
901
902 int _tokenizeInterpolatedExpression(int next, int start) {
903 _appendBeginToken(TokenType.STRING_INTERPOLATION_EXPRESSION);
904 next = _reader.advance();
905 while (next != -1) {
906 if (next == $rbrace) {
907 BeginToken begin =
908 _findTokenMatchingClosingBraceInInterpolationExpression();
909 if (begin == null) {
910 _beginToken();
911 _appendTokenOfType(TokenType.CLOSE_CURLY_BRACKET);
912 next = _reader.advance();
913 _beginToken();
914 return next;
915 } else if (begin.type == TokenType.OPEN_CURLY_BRACKET) {
916 _beginToken();
917 _appendEndToken(
918 TokenType.CLOSE_CURLY_BRACKET, TokenType.OPEN_CURLY_BRACKET);
919 next = _reader.advance();
920 _beginToken();
921 } else if (begin.type == TokenType.STRING_INTERPOLATION_EXPRESSION) {
922 _beginToken();
923 _appendEndToken(TokenType.CLOSE_CURLY_BRACKET,
924 TokenType.STRING_INTERPOLATION_EXPRESSION);
925 next = _reader.advance();
926 _beginToken();
927 return next;
928 }
929 } else {
930 next = bigSwitch(next);
931 }
932 }
933 return next;
934 }
935
936 int _tokenizeInterpolatedIdentifier(int next, int start) {
937 _appendStringTokenWithOffset(
938 TokenType.STRING_INTERPOLATION_IDENTIFIER, "\$", 0);
939 if (($A <= next && next <= $Z) ||
940 ($a <= next && next <= $z) ||
941 next == $_) {
942 _beginToken();
943 next = _tokenizeKeywordOrIdentifier(next, false);
944 }
945 _beginToken();
946 return next;
947 }
948
949 int _tokenizeKeywordOrIdentifier(int next, bool allowDollar) {
950 KeywordState state = KeywordState.KEYWORD_STATE;
951 int start = _reader.offset;
952 while (state != null && $a <= next && next <= $z) {
953 state = state.next(next);
954 next = _reader.advance();
955 }
956 if (state == null || state.keyword() == null) {
957 return _tokenizeIdentifier(next, start, allowDollar);
958 }
959 if (($A <= next && next <= $Z) ||
960 ($0 <= next && next <= $9) ||
961 next == $_ ||
962 next == $$) {
963 return _tokenizeIdentifier(next, start, allowDollar);
964 } else if (next < 128) {
965 _appendKeywordToken(state.keyword());
966 return next;
967 } else {
968 return _tokenizeIdentifier(next, start, allowDollar);
969 }
970 }
971
972 int _tokenizeLessThan(int next) {
973 // < <= << <<=
974 next = _reader.advance();
975 if ($equal == next) {
976 _appendTokenOfType(TokenType.LT_EQ);
977 return _reader.advance();
978 } else if ($lt == next) {
979 return _select($equal, TokenType.LT_LT_EQ, TokenType.LT_LT);
980 } else {
981 _appendTokenOfType(TokenType.LT);
982 return next;
983 }
984 }
985
986 int _tokenizeMinus(int next) {
987 // - -- -=
988 next = _reader.advance();
989 if (next == $minus) {
990 _appendTokenOfType(TokenType.MINUS_MINUS);
991 return _reader.advance();
992 } else if (next == $equal) {
993 _appendTokenOfType(TokenType.MINUS_EQ);
994 return _reader.advance();
995 } else {
996 _appendTokenOfType(TokenType.MINUS);
997 return next;
998 }
999 }
1000
1001 int _tokenizeMultiLineComment(int next) {
1002 int nesting = 1;
1003 next = _reader.advance();
1004 while (true) {
1005 if (-1 == next) {
1006 _reportError(ScannerErrorCode.UNTERMINATED_MULTI_LINE_COMMENT);
1007 _appendCommentToken(
1008 TokenType.MULTI_LINE_COMMENT, _reader.getString(_tokenStart, 0));
1009 return next;
1010 } else if ($asterisk == next) {
1011 next = _reader.advance();
1012 if ($slash == next) {
1013 --nesting;
1014 if (0 == nesting) {
1015 _appendCommentToken(TokenType.MULTI_LINE_COMMENT,
1016 _reader.getString(_tokenStart, 0));
1017 return _reader.advance();
1018 } else {
1019 next = _reader.advance();
1020 }
1021 }
1022 } else if ($slash == next) {
1023 next = _reader.advance();
1024 if ($asterisk == next) {
1025 next = _reader.advance();
1026 ++nesting;
1027 }
1028 } else if (next == $cr) {
1029 next = _reader.advance();
1030 if (next == $lf) {
1031 next = _reader.advance();
1032 }
1033 recordStartOfLine();
1034 } else if (next == $lf) {
1035 next = _reader.advance();
1036 recordStartOfLine();
1037 } else {
1038 next = _reader.advance();
1039 }
1040 }
1041 }
1042
1043 int _tokenizeMultiLineRawString(int quoteChar, int start) {
1044 int next = _reader.advance();
1045 outer:
1046 while (next != -1) {
1047 while (next != quoteChar) {
1048 if (next == -1) {
1049 break outer;
1050 } else if (next == $cr) {
1051 next = _reader.advance();
1052 if (next == $lf) {
1053 next = _reader.advance();
1054 }
1055 recordStartOfLine();
1056 } else if (next == $lf) {
1057 next = _reader.advance();
1058 recordStartOfLine();
1059 } else {
1060 next = _reader.advance();
1061 }
1062 }
1063 next = _reader.advance();
1064 if (next == quoteChar) {
1065 next = _reader.advance();
1066 if (next == quoteChar) {
1067 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1068 return _reader.advance();
1069 }
1070 }
1071 }
1072 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);
1073 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1074 return _reader.advance();
1075 }
1076
1077 int _tokenizeMultiLineString(int quoteChar, int start, bool raw) {
1078 if (raw) {
1079 return _tokenizeMultiLineRawString(quoteChar, start);
1080 }
1081 int next = _reader.advance();
1082 while (next != -1) {
1083 if (next == $$) {
1084 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));
1085 next = _tokenizeStringInterpolation(start);
1086 _beginToken();
1087 start = _reader.offset;
1088 continue;
1089 }
1090 if (next == quoteChar) {
1091 next = _reader.advance();
1092 if (next == quoteChar) {
1093 next = _reader.advance();
1094 if (next == quoteChar) {
1095 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1096 return _reader.advance();
1097 }
1098 }
1099 continue;
1100 }
1101 if (next == $backslash) {
1102 next = _reader.advance();
1103 if (next == -1) {
1104 break;
1105 }
1106 if (next == $cr) {
1107 next = _reader.advance();
1108 if (next == $lf) {
1109 next = _reader.advance();
1110 }
1111 recordStartOfLine();
1112 } else if (next == $lf) {
1113 recordStartOfLine();
1114 next = _reader.advance();
1115 } else {
1116 next = _reader.advance();
1117 }
1118 } else if (next == $cr) {
1119 next = _reader.advance();
1120 if (next == $lf) {
1121 next = _reader.advance();
1122 }
1123 recordStartOfLine();
1124 } else if (next == $lf) {
1125 recordStartOfLine();
1126 next = _reader.advance();
1127 } else {
1128 next = _reader.advance();
1129 }
1130 }
1131 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);
1132 if (start == _reader.offset) {
1133 _appendStringTokenWithOffset(TokenType.STRING, "", 1);
1134 } else {
1135 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1136 }
1137 return _reader.advance();
1138 }
1139
1140 int _tokenizeMultiply(int next) =>
1141 _select($equal, TokenType.STAR_EQ, TokenType.STAR);
1142
1143 int _tokenizeNumber(int next) {
1144 int start = _reader.offset;
1145 while (true) {
1146 next = _reader.advance();
1147 if ($0 <= next && next <= $9) {
1148 continue;
1149 } else if (next == $dot) {
1150 return _tokenizeFractionPart(_reader.advance(), start);
1151 } else if (next == $e || next == $E) {
1152 return _tokenizeFractionPart(next, start);
1153 } else {
1154 _appendStringToken(
1155 TokenType.INT, _reader.getString(start, next < 0 ? 0 : -1));
1156 return next;
1157 }
1158 }
1159 }
1160
1161 int _tokenizeOpenSquareBracket(int next) {
1162 // [ [] []=
1163 next = _reader.advance();
1164 if (next == $close_bracket) {
1165 return _select($equal, TokenType.INDEX_EQ, TokenType.INDEX);
1166 } else {
1167 _appendBeginToken(TokenType.OPEN_SQUARE_BRACKET);
1168 return next;
1169 }
1170 }
1171
1172 int _tokenizePercent(int next) =>
1173 _select($equal, TokenType.PERCENT_EQ, TokenType.PERCENT);
1174
1175 int _tokenizePlus(int next) {
1176 // + ++ +=
1177 next = _reader.advance();
1178 if ($plus == next) {
1179 _appendTokenOfType(TokenType.PLUS_PLUS);
1180 return _reader.advance();
1181 } else if ($equal == next) {
1182 _appendTokenOfType(TokenType.PLUS_EQ);
1183 return _reader.advance();
1184 } else {
1185 _appendTokenOfType(TokenType.PLUS);
1186 return next;
1187 }
1188 }
1189
1190 int _tokenizeQuestion() {
1191 // ? ?. ?? ??=
1192 int next = _reader.advance();
1193 if (next == $dot) {
1194 // '.'
1195 _appendTokenOfType(TokenType.QUESTION_PERIOD);
1196 return _reader.advance();
1197 } else if (next == $question) {
1198 // '?'
1199 next = _reader.advance();
1200 if (next == $equal) {
1201 // '='
1202 _appendTokenOfType(TokenType.QUESTION_QUESTION_EQ);
1203 return _reader.advance();
1204 } else {
1205 _appendTokenOfType(TokenType.QUESTION_QUESTION);
1206 return next;
1207 }
1208 } else {
1209 _appendTokenOfType(TokenType.QUESTION);
1210 return next;
1211 }
1212 }
1213
1214 int _tokenizeSingleLineComment(int next) {
1215 while (true) {
1216 next = _reader.advance();
1217 if (-1 == next) {
1218 _appendCommentToken(
1219 TokenType.SINGLE_LINE_COMMENT, _reader.getString(_tokenStart, 0));
1220 return next;
1221 } else if ($lf == next || $cr == next) {
1222 _appendCommentToken(
1223 TokenType.SINGLE_LINE_COMMENT, _reader.getString(_tokenStart, -1));
1224 return next;
1225 }
1226 }
1227 }
1228
1229 int _tokenizeSingleLineRawString(int next, int quoteChar, int start) {
1230 next = _reader.advance();
1231 while (next != -1) {
1232 if (next == quoteChar) {
1233 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1234 return _reader.advance();
1235 } else if (next == $cr || next == $lf) {
1236 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);
1237 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));
1238 return _reader.advance();
1239 }
1240 next = _reader.advance();
1241 }
1242 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);
1243 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1244 return _reader.advance();
1245 }
1246
1247 int _tokenizeSingleLineString(int next, int quoteChar, int start) {
1248 while (next != quoteChar) {
1249 if (next == $backslash) {
1250 next = _reader.advance();
1251 } else if (next == $$) {
1252 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));
1253 next = _tokenizeStringInterpolation(start);
1254 _beginToken();
1255 start = _reader.offset;
1256 continue;
1257 }
1258 if (next <= $cr && (next == $lf || next == $cr || next == -1)) {
1259 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);
1260 if (start == _reader.offset) {
1261 _appendStringTokenWithOffset(TokenType.STRING, "", 1);
1262 } else if (next == -1) {
1263 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1264 } else {
1265 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));
1266 }
1267 return _reader.advance();
1268 }
1269 next = _reader.advance();
1270 }
1271 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));
1272 return _reader.advance();
1273 }
1274
1275 int _tokenizeSlashOrComment(int next) {
1276 next = _reader.advance();
1277 if ($asterisk == next) {
1278 return _tokenizeMultiLineComment(next);
1279 } else if ($slash == next) {
1280 return _tokenizeSingleLineComment(next);
1281 } else if ($equal == next) {
1282 _appendTokenOfType(TokenType.SLASH_EQ);
1283 return _reader.advance();
1284 } else {
1285 _appendTokenOfType(TokenType.SLASH);
1286 return next;
1287 }
1288 }
1289
1290 int _tokenizeString(int next, int start, bool raw) {
1291 int quoteChar = next;
1292 next = _reader.advance();
1293 if (quoteChar == next) {
1294 next = _reader.advance();
1295 if (quoteChar == next) {
1296 // Multiline string.
1297 return _tokenizeMultiLineString(quoteChar, start, raw);
1298 } else {
1299 // Empty string.
1300 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));
1301 return next;
1302 }
1303 }
1304 if (raw) {
1305 return _tokenizeSingleLineRawString(next, quoteChar, start);
1306 } else {
1307 return _tokenizeSingleLineString(next, quoteChar, start);
1308 }
1309 }
1310
1311 int _tokenizeStringInterpolation(int start) {
1312 _beginToken();
1313 int next = _reader.advance();
1314 if (next == $lbrace) {
1315 return _tokenizeInterpolatedExpression(next, start);
1316 } else {
1317 return _tokenizeInterpolatedIdentifier(next, start);
1318 }
1319 }
1320
1321 int _tokenizeTag(int next) {
1322 // # or #!.*[\n\r]
1323 if (_reader.offset == 0) {
1324 if (_reader.peek() == $exclamation) {
1325 do {
1326 next = _reader.advance();
1327 } while (next != $lf && next != $cr && next > 0);
1328 _appendStringToken(
1329 TokenType.SCRIPT_TAG, _reader.getString(_tokenStart, 0));
1330 return next;
1331 }
1332 }
1333 _appendTokenOfType(TokenType.HASH);
1334 return _reader.advance();
1335 }
1336
1337 int _tokenizeTilde(int next) {
1338 // ~ ~/ ~/=
1339 next = _reader.advance();
1340 if (next == $slash) {
1341 return _select($equal, TokenType.TILDE_SLASH_EQ, TokenType.TILDE_SLASH);
1342 } else {
1343 _appendTokenOfType(TokenType.TILDE);
1344 return next;
1345 }
1346 }
1347
1348 /**
1349 * Checks if [value] is a single-line or multi-line comment.
1350 */
1351 static bool _isDocumentationComment(String value) {
1352 return StringUtilities.startsWith3(value, 0, $slash, $slash, $slash) ||
1353 StringUtilities.startsWith3(value, 0, $slash, $asterisk, $asterisk);
1354 } 53 }
1355 } 54 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698