pkg/analyzer/lib/src/dart/scanner/scanner.dart - Issue 2486873003: Move scanner into pkg/front_end/lib/src/scanner.

Side by Side Diff: pkg/analyzer/lib/src/dart/scanner/scanner.dart

Issue 2486873003: Move scanner into pkg/front_end/lib/src/scanner. (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« pkg/analyzer/lib/error/error.dart ('K') | « pkg/analyzer/lib/src/dart/scanner/reader.dart ('k') | pkg/analyzer/lib/src/generated/interner.dart » ('j') | pkg/front_end/lib/src/scanner/errors.dart » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 library analyzer.src.dart.scanner.scanner;	5 library analyzer.src.dart.scanner.scanner;

6	6

7 import 'package:analyzer/dart/ast/token.dart';

8 import 'package:analyzer/error/error.dart';	7 import 'package:analyzer/error/error.dart';

9 import 'package:analyzer/error/listener.dart';	8 import 'package:analyzer/error/listener.dart';

10 import 'package:analyzer/src/dart/ast/token.dart';

11 import 'package:analyzer/src/dart/error/syntactic_errors.dart';	9 import 'package:analyzer/src/dart/error/syntactic_errors.dart';

12 import 'package:analyzer/src/dart/scanner/reader.dart';	10 import 'package:analyzer/src/dart/scanner/reader.dart';

13 import 'package:analyzer/src/generated/java_engine.dart';

14 import 'package:analyzer/src/generated/source.dart';	11 import 'package:analyzer/src/generated/source.dart';

15 import 'package:charcode/ascii.dart';	12 import 'package:front_end/src/scanner/scanner.dart' as fe;

16	13

17 export 'package:analyzer/src/dart/error/syntactic_errors.dart';	14 export 'package:analyzer/src/dart/error/syntactic_errors.dart';

18	15 export 'package:front_end/src/scanner/scanner.dart' show KeywordState;

19 /**

20 * A state in a state machine used to scan keywords.

21 */

22 class KeywordState {

23 /**

24 * An empty transition table used by leaf states.

25 */

26 static List<KeywordState> _EMPTY_TABLE = new List<KeywordState>(26);

27

28 /**

29 * The initial state in the state machine.

30 */

31 static final KeywordState KEYWORD_STATE = _createKeywordStateTable();

32

33 /**

34 * A table mapping characters to the states to which those characters will

35 * transition. (The index into the array is the offset from the character

36 * `'a'` to the transitioning character.)

37 */

38 final List<KeywordState> _table;

39

40 /**

41 * The keyword that is recognized by this state, or `null` if this state is

42 * not a terminal state.

43 */

44 Keyword _keyword;

45

46 /**

47 * Initialize a newly created state to have the given transitions and to

48 * recognize the keyword with the given [syntax].

49 */

50 KeywordState(this._table, String syntax) {

51 this._keyword = (syntax == null) ? null : Keyword.keywords[syntax];

52 }

53

54 /**

55 * Return the keyword that was recognized by this state, or `null` if this

56 * state does not recognized a keyword.

57 */

58 Keyword keyword() => _keyword;

59

60 /**

61 * Return the state that follows this state on a transition of the given

62 * [character], or `null` if there is no valid state reachable from this state

63 * with such a transition.

64 */

65 KeywordState next(int character) => _table[character - $a];

66

67 /**

68 * Create the next state in the state machine where we have already recognized

69 * the subset of strings in the given array of [strings] starting at the given

70 * [offset] and having the given [length]. All of these strings have a common

71 * prefix and the next character is at the given [start] index.

72 */

73 static KeywordState _computeKeywordStateTable(

74 int start, List<String> strings, int offset, int length) {

75 List<KeywordState> result = new List<KeywordState>(26);

76 assert(length != 0);

77 int chunk = $nul;

78 int chunkStart = -1;

79 bool isLeaf = false;

80 for (int i = offset; i < offset + length; i++) {

81 if (strings[i].length == start) {

82 isLeaf = true;

83 }

84 if (strings[i].length > start) {

85 int c = strings[i].codeUnitAt(start);

86 if (chunk != c) {

87 if (chunkStart != -1) {

88 result[chunk - $a] = _computeKeywordStateTable(

89 start + 1, strings, chunkStart, i - chunkStart);

90 }

91 chunkStart = i;

92 chunk = c;

93 }

94 }

95 }

96 if (chunkStart != -1) {

97 assert(result[chunk - $a] == null);

98 result[chunk - $a] = _computeKeywordStateTable(

99 start + 1, strings, chunkStart, offset + length - chunkStart);

100 } else {

101 assert(length == 1);

102 return new KeywordState(_EMPTY_TABLE, strings[offset]);

103 }

104 if (isLeaf) {

105 return new KeywordState(result, strings[offset]);

106 } else {

107 return new KeywordState(result, null);

108 }

109 }

110

111 /**

112 * Create and return the initial state in the state machine.

113 */

114 static KeywordState _createKeywordStateTable() {

115 List<Keyword> values = Keyword.values;

116 List<String> strings = new List<String>(values.length);

117 for (int i = 0; i < values.length; i++) {

118 strings[i] = values[i].syntax;

119 }

120 strings.sort();

121 return _computeKeywordStateTable(0, strings, 0, strings.length);

122 }

123 }

124	16

125 /**	17 /**

126 * The class `Scanner` implements a scanner for Dart code.	18 * The class `Scanner` implements a scanner for Dart code.

127 *	19 *

128 * The lexical structure of Dart is ambiguous without knowledge of the context	20 * The lexical structure of Dart is ambiguous without knowledge of the context

129 * in which a token is being scanned. For example, without context we cannot	21 * in which a token is being scanned. For example, without context we cannot

130 * determine whether source of the form "<<" should be scanned as a single	22 * determine whether source of the form "<<" should be scanned as a single

131 * left-shift operator or as two left angle brackets. This scanner does not have	23 * left-shift operator or as two left angle brackets. This scanner does not have

132 * any context, so it always resolves such conflicts by scanning the longest	24 * any context, so it always resolves such conflicts by scanning the longest

133 * possible token.	25 * possible token.

134 */	26 */

135 class Scanner {	27 class Scanner extends fe.Scanner {

136 /**	28 /**

137 * The source being scanned.	29 * The source being scanned.

138 */	30 */

139 final Source source;	31 final Source source;

140	32

141 /**	33 /**

142 * The reader used to access the characters in the source.

143 */

144 final CharacterReader _reader;

145

146 /**

147 * The error listener that will be informed of any errors that are found	34 * The error listener that will be informed of any errors that are found

148 * during the scan.	35 * during the scan.

149 */	36 */

150 final AnalysisErrorListener _errorListener;	37 final AnalysisErrorListener _errorListener;

151	38

152 /**	39 /**

153 * The flag specifying whether documentation comments should be parsed.

154 */

155 bool _preserveComments = true;

156

157 /**

158 * The token pointing to the head of the linked list of tokens.

159 */

160 Token _tokens;

161

162 /**

163 * The last token that was scanned.

164 */

165 Token _tail;

166

167 /**

168 * The first token in the list of comment tokens found since the last

169 * non-comment token.

170 */

171 Token _firstComment;

172

173 /**

174 * The last token in the list of comment tokens found since the last

175 * non-comment token.

176 */

177 Token _lastComment;

178

179 /**

180 * The index of the first character of the current token.

181 */

182 int _tokenStart = 0;

183

184 /**

185 * A list containing the offsets of the first character of each line in the

186 * source code.

187 */

188 List<int> _lineStarts = new List<int>();

189

190 /**

191 * A list, treated something like a stack, of tokens representing the

192 * beginning of a matched pair. It is used to pair the end tokens with the

193 * begin tokens.

194 */

195 List<BeginToken> _groupingStack = new List<BeginToken>();

196

197 /**

198 * The index of the last item in the [_groupingStack], or `-1` if the stack is

199 * empty.

200 */

201 int _stackEnd = -1;

202

203 /**

204 * A flag indicating whether any unmatched groups were found during the parse.

205 */

206 bool _hasUnmatchedGroups = false;

207

208 /**

209 * A flag indicating whether to parse generic method comments, of the form

210 * `/=T/` and `/<T>/`.

211 */

212 bool scanGenericMethodComments = false;

213

214 /**

215 * A flag indicating whether the lazy compound assignment operators '&&=' and

216 * '\|\|=' are enabled.

217 */

218 bool scanLazyAssignmentOperators = false;

219

220 /**

221 * Initialize a newly created scanner to scan characters from the given	40 * Initialize a newly created scanner to scan characters from the given

222 * [source]. The given character [_reader] will be used to read the characters	41 * [source]. The given character [reader] will be used to read the characters

223 * in the source. The given [_errorListener] will be informed of any errors	42 * in the source. The given [_errorListener] will be informed of any errors

224 * that are found.	43 * that are found.

225 */	44 */

226 Scanner(this.source, this._reader, this._errorListener) {	45 Scanner(this.source, CharacterReader reader, this._errorListener)

227 _tokens = new Token(TokenType.EOF, -1);	46 : super(reader);

228 _tokens.setNext(_tokens);

229 _tail = _tokens;

230 _tokenStart = -1;

231 _lineStarts.add(0);

232 }

233	47

234 /**	48 @override

235 * Return the first token in the token stream that was scanned.	49 void reportError(

236 */	50 ScannerErrorCode errorCode, int offset, List<Object> arguments) {

237 Token get firstToken => _tokens.next;	51 _errorListener

238	52 .onError(new AnalysisError(source, offset, 1, errorCode, arguments));

239 /**

240 * Return `true` if any unmatched groups were found during the parse.

241 */

242 bool get hasUnmatchedGroups => _hasUnmatchedGroups;

243

244 /**

245 * Return an array containing the offsets of the first character of each line

246 * in the source code.

247 */

248 List<int> get lineStarts => _lineStarts;

249

250 /**

251 * Set whether documentation tokens should be preserved.

252 */

253 void set preserveComments(bool preserveComments) {

254 this._preserveComments = preserveComments;

255 }

256

257 /**

258 * Return the last token that was scanned.

259 */

260 Token get tail => _tail;

261

262 /**

263 * Append the given [token] to the end of the token stream being scanned. This

264 * method is intended to be used by subclasses that copy existing tokens and

265 * should not normally be used because it will fail to correctly associate any

266 * comments with the token being passed in.

267 */

268 void appendToken(Token token) {

269 _tail = _tail.setNext(token);

270 }

271

272 int bigSwitch(int next) {

273 _beginToken();

274 if (next == $cr) {

275 // '\r'

276 next = _reader.advance();

277 if (next == $lf) {

278 // '\n'

279 next = _reader.advance();

280 }

281 recordStartOfLine();

282 return next;

283 } else if (next == $lf) {

284 // '\n'

285 next = _reader.advance();

286 recordStartOfLine();

287 return next;

288 } else if (next == $tab \|\| next == $space) {

289 // '\t' \|\| ' '

290 return _reader.advance();

291 }

292 if (next == $r) {

293 // 'r'

294 int peek = _reader.peek();

295 if (peek == $double_quote \|\| peek == $single_quote) {

296 // '"' \|\| "'"

297 int start = _reader.offset;

298 return _tokenizeString(_reader.advance(), start, true);

299 }

300 }

301 if ($a <= next && next <= $z) {

302 // 'a'-'z'

303 return _tokenizeKeywordOrIdentifier(next, true);

304 }

305 if (($A <= next && next <= $Z) \|\| next == $_ \|\| next == $$) {

306 // 'A'-'Z' \|\| '_' \|\| '$'

307 return _tokenizeIdentifier(next, _reader.offset, true);

308 }

309 if (next == $lt) {

310 // '<'

311 return _tokenizeLessThan(next);

312 }

313 if (next == $gt) {

314 // '>'

315 return _tokenizeGreaterThan(next);

316 }

317 if (next == $equal) {

318 // '='

319 return _tokenizeEquals(next);

320 }

321 if (next == $exclamation) {

322 // '!'

323 return _tokenizeExclamation(next);

324 }

325 if (next == $plus) {

326 // '+'

327 return _tokenizePlus(next);

328 }

329 if (next == $minus) {

330 // '-'

331 return _tokenizeMinus(next);

332 }

333 if (next == $asterisk) {

334 // '*'

335 return _tokenizeMultiply(next);

336 }

337 if (next == $percent) {

338 // '%'

339 return _tokenizePercent(next);

340 }

341 if (next == $ampersand) {

342 // '&'

343 return _tokenizeAmpersand(next);

344 }

345 if (next == $bar) {

346 // '\|'

347 return _tokenizeBar(next);

348 }

349 if (next == $caret) {

350 // '^'

351 return _tokenizeCaret(next);

352 }

353 if (next == $open_bracket) {

354 // '['

355 return _tokenizeOpenSquareBracket(next);

356 }

357 if (next == $tilde) {

358 // '~'

359 return _tokenizeTilde(next);

360 }

361 if (next == $backslash) {

362 // '\\'

363 _appendTokenOfType(TokenType.BACKSLASH);

364 return _reader.advance();

365 }

366 if (next == $hash) {

367 // '#'

368 return _tokenizeTag(next);

369 }

370 if (next == $open_paren) {

371 // '('

372 _appendBeginToken(TokenType.OPEN_PAREN);

373 return _reader.advance();

374 }

375 if (next == $close_paren) {

376 // ')'

377 _appendEndToken(TokenType.CLOSE_PAREN, TokenType.OPEN_PAREN);

378 return _reader.advance();

379 }

380 if (next == $comma) {

381 // ','

382 _appendTokenOfType(TokenType.COMMA);

383 return _reader.advance();

384 }

385 if (next == $colon) {

386 // ':'

387 _appendTokenOfType(TokenType.COLON);

388 return _reader.advance();

389 }

390 if (next == $semicolon) {

391 // ';'

392 _appendTokenOfType(TokenType.SEMICOLON);

393 return _reader.advance();

394 }

395 if (next == $question) {

396 // '?'

397 return _tokenizeQuestion();

398 }

399 if (next == $close_bracket) {

400 // ']'

401 _appendEndToken(

402 TokenType.CLOSE_SQUARE_BRACKET, TokenType.OPEN_SQUARE_BRACKET);

403 return _reader.advance();

404 }

405 if (next == $backquote) {

406 // '`'

407 _appendTokenOfType(TokenType.BACKPING);

408 return _reader.advance();

409 }

410 if (next == $lbrace) {

411 // '{'

412 _appendBeginToken(TokenType.OPEN_CURLY_BRACKET);

413 return _reader.advance();

414 }

415 if (next == $rbrace) {

416 // '}'

417 _appendEndToken(

418 TokenType.CLOSE_CURLY_BRACKET, TokenType.OPEN_CURLY_BRACKET);

419 return _reader.advance();

420 }

421 if (next == $slash) {

422 // '/'

423 return _tokenizeSlashOrComment(next);

424 }

425 if (next == $at) {

426 // '@'

427 _appendTokenOfType(TokenType.AT);

428 return _reader.advance();

429 }

430 if (next == $double_quote \|\| next == $single_quote) {

431 // '"' \|\| "'"

432 return _tokenizeString(next, _reader.offset, false);

433 }

434 if (next == $dot) {

435 // '.'

436 return _tokenizeDotOrNumber(next);

437 }

438 if (next == $0) {

439 // '0'

440 return _tokenizeHexOrNumber(next);

441 }

442 if ($1 <= next && next <= $9) {

443 // '1'-'9'

444 return _tokenizeNumber(next);

445 }

446 if (next == -1) {

447 // EOF

448 return -1;

449 }

450 _reportError(ScannerErrorCode.ILLEGAL_CHARACTER, [next]);

451 return _reader.advance();

452 }

453

454 /**

455 * Record the fact that we are at the beginning of a new line in the source.

456 */

457 void recordStartOfLine() {

458 _lineStarts.add(_reader.offset);

459 }

460

461 /**

462 * Record that the source begins on the given [line] and [column] at the

463 * current offset as given by the reader. Both the line and the column are

464 * one-based indexes. The line starts for lines before the given line will not

465 * be correct.

466 *

467 * This method must be invoked at most one time and must be invoked before

468 * scanning begins. The values provided must be sensible. The results are

469 * undefined if these conditions are violated.

470 */

471 void setSourceStart(int line, int column) {

472 int offset = _reader.offset;

473 if (line < 1 \|\| column < 1 \|\| offset < 0 \|\| (line + column - 2) >= offset) {

474 return;

475 }

476 for (int i = 2; i < line; i++) {

477 _lineStarts.add(1);

478 }

479 _lineStarts.add(offset - column + 1);

480 }

481

482 /**

483 * Scan the source code to produce a list of tokens representing the source,

484 * and return the first token in the list of tokens that were produced.

485 */

486 Token tokenize() {

487 int next = _reader.advance();

488 while (next != -1) {

489 next = bigSwitch(next);

490 }

491 _appendEofToken();

492 return firstToken;

493 }

494

495 void _appendBeginToken(TokenType type) {

496 BeginToken token;

497 if (_firstComment == null) {

498 token = new BeginToken(type, _tokenStart);

499 } else {

500 token = new BeginTokenWithComment(type, _tokenStart, _firstComment);

501 _firstComment = null;

502 _lastComment = null;

503 }

504 _tail = _tail.setNext(token);

505 _groupingStack.add(token);

506 _stackEnd++;

507 }

508

509 void _appendCommentToken(TokenType type, String value) {

510 CommentToken token = null;

511 TokenType genericComment = _matchGenericMethodCommentType(value);

512 if (genericComment != null) {

513 token = new CommentToken(genericComment, value, _tokenStart);

514 } else if (!_preserveComments) {

515 // Ignore comment tokens if client specified that it doesn't need them.

516 return;

517 } else {

518 // OK, remember comment tokens.

519 if (_isDocumentationComment(value)) {

520 token = new DocumentationCommentToken(type, value, _tokenStart);

521 } else {

522 token = new CommentToken(type, value, _tokenStart);

523 }

524 }

525 if (_firstComment == null) {

526 _firstComment = token;

527 _lastComment = _firstComment;

528 } else {

529 _lastComment = _lastComment.setNext(token);

530 }

531 }

532

533 void _appendEndToken(TokenType type, TokenType beginType) {

534 Token token;

535 if (_firstComment == null) {

536 token = new Token(type, _tokenStart);

537 } else {

538 token = new TokenWithComment(type, _tokenStart, _firstComment);

539 _firstComment = null;

540 _lastComment = null;

541 }

542 _tail = _tail.setNext(token);

543 if (_stackEnd >= 0) {

544 BeginToken begin = _groupingStack[_stackEnd];

545 if (begin.type == beginType) {

546 begin.endToken = token;

547 _groupingStack.removeAt(_stackEnd--);

548 }

549 }

550 }

551

552 void _appendEofToken() {

553 Token eofToken;

554 if (_firstComment == null) {

555 eofToken = new Token(TokenType.EOF, _reader.offset + 1);

556 } else {

557 eofToken = new TokenWithComment(

558 TokenType.EOF, _reader.offset + 1, _firstComment);

559 _firstComment = null;

560 _lastComment = null;

561 }

562 // The EOF token points to itself so that there is always infinite

563 // look-ahead.

564 eofToken.setNext(eofToken);

565 _tail = _tail.setNext(eofToken);

566 if (_stackEnd >= 0) {

567 _hasUnmatchedGroups = true;

568 // TODO(brianwilkerson) Fix the ungrouped tokens?

569 }

570 }

571

572 void _appendKeywordToken(Keyword keyword) {

573 if (_firstComment == null) {

574 _tail = _tail.setNext(new KeywordToken(keyword, _tokenStart));

575 } else {

576 _tail = _tail.setNext(

577 new KeywordTokenWithComment(keyword, _tokenStart, _firstComment));

578 _firstComment = null;

579 _lastComment = null;

580 }

581 }

582

583 void _appendStringToken(TokenType type, String value) {

584 if (_firstComment == null) {

585 _tail = _tail.setNext(new StringToken(type, value, _tokenStart));

586 } else {

587 _tail = _tail.setNext(

588 new StringTokenWithComment(type, value, _tokenStart, _firstComment));

589 _firstComment = null;

590 _lastComment = null;

591 }

592 }

593

594 void _appendStringTokenWithOffset(TokenType type, String value, int offset) {

595 if (_firstComment == null) {

596 _tail = _tail.setNext(new StringToken(type, value, _tokenStart + offset));

597 } else {

598 _tail = _tail.setNext(new StringTokenWithComment(

599 type, value, _tokenStart + offset, _firstComment));

600 _firstComment = null;

601 _lastComment = null;

602 }

603 }

604

605 void _appendTokenOfType(TokenType type) {

606 if (_firstComment == null) {

607 _tail = _tail.setNext(new Token(type, _tokenStart));

608 } else {

609 _tail =

610 _tail.setNext(new TokenWithComment(type, _tokenStart, _firstComment));

611 _firstComment = null;

612 _lastComment = null;

613 }

614 }

615

616 void _appendTokenOfTypeWithOffset(TokenType type, int offset) {

617 if (_firstComment == null) {

618 _tail = _tail.setNext(new Token(type, offset));

619 } else {

620 _tail = _tail.setNext(new TokenWithComment(type, offset, _firstComment));

621 _firstComment = null;

622 _lastComment = null;

623 }

624 }

625

626 void _beginToken() {

627 _tokenStart = _reader.offset;

628 }

629

630 /**

631 * Return the beginning token corresponding to a closing brace that was found

632 * while scanning inside a string interpolation expression. Tokens that cannot

633 * be matched with the closing brace will be dropped from the stack.

634 */

635 BeginToken _findTokenMatchingClosingBraceInInterpolationExpression() {

636 while (_stackEnd >= 0) {

637 BeginToken begin = _groupingStack[_stackEnd];

638 if (begin.type == TokenType.OPEN_CURLY_BRACKET \|\|

639 begin.type == TokenType.STRING_INTERPOLATION_EXPRESSION) {

640 return begin;

641 }

642 _hasUnmatchedGroups = true;

643 _groupingStack.removeAt(_stackEnd--);

644 }

645 //

646 // We should never get to this point because we wouldn't be inside a string

647 // interpolation expression unless we had previously found the start of the

648 // expression.

649 //

650 return null;

651 }

652

653 /**

654 * Checks if [value] is the start of a generic method type annotation comment.

655 *

656 * This can either be of the form `/<T>/` or `/=T/`. The token type is

657 * returned, or null if it was not a generic method comment.

658 */

659 TokenType _matchGenericMethodCommentType(String value) {

660 if (scanGenericMethodComments) {

661 // Match /< and >/

662 if (StringUtilities.startsWith3(value, 0, $slash, $asterisk, $lt) &&

663 StringUtilities.endsWith3(value, $gt, $asterisk, $slash)) {

664 return TokenType.GENERIC_METHOD_TYPE_LIST;

665 }

666 // Match /*=

667 if (StringUtilities.startsWith3(value, 0, $slash, $asterisk, $equal)) {

668 return TokenType.GENERIC_METHOD_TYPE_ASSIGN;

669 }

670 }

671 return null;

672 }

673

674 /**

675 * Report an error at the current offset. The [errorCode] is the error code

676 * indicating the nature of the error. The [arguments] are any arguments

677 * needed to complete the error message

678 */

679 void _reportError(ScannerErrorCode errorCode, [List<Object> arguments]) {

680 _errorListener.onError(

681 new AnalysisError(source, _reader.offset, 1, errorCode, arguments));

682 }

683

684 int _select(int choice, TokenType yesType, TokenType noType) {

685 int next = _reader.advance();

686 if (next == choice) {

687 _appendTokenOfType(yesType);

688 return _reader.advance();

689 } else {

690 _appendTokenOfType(noType);

691 return next;

692 }

693 }

694

695 int _selectWithOffset(

696 int choice, TokenType yesType, TokenType noType, int offset) {

697 int next = _reader.advance();

698 if (next == choice) {

699 _appendTokenOfTypeWithOffset(yesType, offset);

700 return _reader.advance();

701 } else {

702 _appendTokenOfTypeWithOffset(noType, offset);

703 return next;

704 }

705 }

706

707 int _tokenizeAmpersand(int next) {

708 // &&= && &= &

709 next = _reader.advance();

710 if (next == $ampersand) {

711 next = _reader.advance();

712 if (scanLazyAssignmentOperators && next == $equal) {

713 _appendTokenOfType(TokenType.AMPERSAND_AMPERSAND_EQ);

714 return _reader.advance();

715 }

716 _appendTokenOfType(TokenType.AMPERSAND_AMPERSAND);

717 return next;

718 } else if (next == $equal) {

719 _appendTokenOfType(TokenType.AMPERSAND_EQ);

720 return _reader.advance();

721 } else {

722 _appendTokenOfType(TokenType.AMPERSAND);

723 return next;

724 }

725 }

726

727 int _tokenizeBar(int next) {

728 // \|\|= \|\| \|= \|

729 next = _reader.advance();

730 if (next == $bar) {

731 next = _reader.advance();

732 if (scanLazyAssignmentOperators && next == $equal) {

733 _appendTokenOfType(TokenType.BAR_BAR_EQ);

734 return _reader.advance();

735 }

736 _appendTokenOfType(TokenType.BAR_BAR);

737 return next;

738 } else if (next == $equal) {

739 _appendTokenOfType(TokenType.BAR_EQ);

740 return _reader.advance();

741 } else {

742 _appendTokenOfType(TokenType.BAR);

743 return next;

744 }

745 }

746

747 int _tokenizeCaret(int next) =>

748 _select($equal, TokenType.CARET_EQ, TokenType.CARET);

749

750 int _tokenizeDotOrNumber(int next) {

751 int start = _reader.offset;

752 next = _reader.advance();

753 if ($0 <= next && next <= $9) {

754 return _tokenizeFractionPart(next, start);

755 } else if ($dot == next) {

756 return _select(

757 $dot, TokenType.PERIOD_PERIOD_PERIOD, TokenType.PERIOD_PERIOD);

758 } else {

759 _appendTokenOfType(TokenType.PERIOD);

760 return next;

761 }

762 }

763

764 int _tokenizeEquals(int next) {

765 // = == =>

766 next = _reader.advance();

767 if (next == $equal) {

768 _appendTokenOfType(TokenType.EQ_EQ);

769 return _reader.advance();

770 } else if (next == $gt) {

771 _appendTokenOfType(TokenType.FUNCTION);

772 return _reader.advance();

773 }

774 _appendTokenOfType(TokenType.EQ);

775 return next;

776 }

777

778 int _tokenizeExclamation(int next) {

779 // ! !=

780 next = _reader.advance();

781 if (next == $equal) {

782 _appendTokenOfType(TokenType.BANG_EQ);

783 return _reader.advance();

784 }

785 _appendTokenOfType(TokenType.BANG);

786 return next;

787 }

788

789 int _tokenizeExponent(int next) {

790 if (next == $plus \|\| next == $minus) {

791 next = _reader.advance();

792 }

793 bool hasDigits = false;

794 while (true) {

795 if ($0 <= next && next <= $9) {

796 hasDigits = true;

797 } else {

798 if (!hasDigits) {

799 _reportError(ScannerErrorCode.MISSING_DIGIT);

800 }

801 return next;

802 }

803 next = _reader.advance();

804 }

805 }

806

807 int _tokenizeFractionPart(int next, int start) {

808 bool done = false;

809 bool hasDigit = false;

810 LOOP:

811 while (!done) {

812 if ($0 <= next && next <= $9) {

813 hasDigit = true;

814 } else if ($e == next \|\| $E == next) {

815 hasDigit = true;

816 next = _tokenizeExponent(_reader.advance());

817 done = true;

818 continue LOOP;

819 } else {

820 done = true;

821 continue LOOP;

822 }

823 next = _reader.advance();

824 }

825 if (!hasDigit) {

826 _appendStringToken(TokenType.INT, _reader.getString(start, -2));

827 if ($dot == next) {

828 return _selectWithOffset($dot, TokenType.PERIOD_PERIOD_PERIOD,

829 TokenType.PERIOD_PERIOD, _reader.offset - 1);

830 }

831 _appendTokenOfTypeWithOffset(TokenType.PERIOD, _reader.offset - 1);

832 return bigSwitch(next);

833 }

834 _appendStringToken(

835 TokenType.DOUBLE, _reader.getString(start, next < 0 ? 0 : -1));

836 return next;

837 }

838

839 int _tokenizeGreaterThan(int next) {

840 // > >= >> >>=

841 next = _reader.advance();

842 if ($equal == next) {

843 _appendTokenOfType(TokenType.GT_EQ);

844 return _reader.advance();

845 } else if ($gt == next) {

846 next = _reader.advance();

847 if ($equal == next) {

848 _appendTokenOfType(TokenType.GT_GT_EQ);

849 return _reader.advance();

850 } else {

851 _appendTokenOfType(TokenType.GT_GT);

852 return next;

853 }

854 } else {

855 _appendTokenOfType(TokenType.GT);

856 return next;

857 }

858 }

859

860 int _tokenizeHex(int next) {

861 int start = _reader.offset - 1;

862 bool hasDigits = false;

863 while (true) {

864 next = _reader.advance();

865 if (($0 <= next && next <= $9) \|\|

866 ($A <= next && next <= $F) \|\|

867 ($a <= next && next <= $f)) {

868 hasDigits = true;

869 } else {

870 if (!hasDigits) {

871 _reportError(ScannerErrorCode.MISSING_HEX_DIGIT);

872 }

873 _appendStringToken(

874 TokenType.HEXADECIMAL, _reader.getString(start, next < 0 ? 0 : -1));

875 return next;

876 }

877 }

878 }

879

880 int _tokenizeHexOrNumber(int next) {

881 int x = _reader.peek();

882 if (x == $x \|\| x == $X) {

883 _reader.advance();

884 return _tokenizeHex(x);

885 }

886 return _tokenizeNumber(next);

887 }

888

889 int _tokenizeIdentifier(int next, int start, bool allowDollar) {

890 while (($a <= next && next <= $z) \|\|

891 ($A <= next && next <= $Z) \|\|

892 ($0 <= next && next <= $9) \|\|

893 next == $_ \|\|

894 (next == $$ && allowDollar)) {

895 next = _reader.advance();

896 }

897 _appendStringToken(

898 TokenType.IDENTIFIER, _reader.getString(start, next < 0 ? 0 : -1));

899 return next;

900 }

901

902 int _tokenizeInterpolatedExpression(int next, int start) {

903 _appendBeginToken(TokenType.STRING_INTERPOLATION_EXPRESSION);

904 next = _reader.advance();

905 while (next != -1) {

906 if (next == $rbrace) {

907 BeginToken begin =

908 _findTokenMatchingClosingBraceInInterpolationExpression();

909 if (begin == null) {

910 _beginToken();

911 _appendTokenOfType(TokenType.CLOSE_CURLY_BRACKET);

912 next = _reader.advance();

913 _beginToken();

914 return next;

915 } else if (begin.type == TokenType.OPEN_CURLY_BRACKET) {

916 _beginToken();

917 _appendEndToken(

918 TokenType.CLOSE_CURLY_BRACKET, TokenType.OPEN_CURLY_BRACKET);

919 next = _reader.advance();

920 _beginToken();

921 } else if (begin.type == TokenType.STRING_INTERPOLATION_EXPRESSION) {

922 _beginToken();

923 _appendEndToken(TokenType.CLOSE_CURLY_BRACKET,

924 TokenType.STRING_INTERPOLATION_EXPRESSION);

925 next = _reader.advance();

926 _beginToken();

927 return next;

928 }

929 } else {

930 next = bigSwitch(next);

931 }

932 }

933 return next;

934 }

935

936 int _tokenizeInterpolatedIdentifier(int next, int start) {

937 _appendStringTokenWithOffset(

938 TokenType.STRING_INTERPOLATION_IDENTIFIER, "\$", 0);

939 if (($A <= next && next <= $Z) \|\|

940 ($a <= next && next <= $z) \|\|

941 next == $_) {

942 _beginToken();

943 next = _tokenizeKeywordOrIdentifier(next, false);

944 }

945 _beginToken();

946 return next;

947 }

948

949 int _tokenizeKeywordOrIdentifier(int next, bool allowDollar) {

950 KeywordState state = KeywordState.KEYWORD_STATE;

951 int start = _reader.offset;

952 while (state != null && $a <= next && next <= $z) {

953 state = state.next(next);

954 next = _reader.advance();

955 }

956 if (state == null \|\| state.keyword() == null) {

957 return _tokenizeIdentifier(next, start, allowDollar);

958 }

959 if (($A <= next && next <= $Z) \|\|

960 ($0 <= next && next <= $9) \|\|

961 next == $_ \|\|

962 next == $$) {

963 return _tokenizeIdentifier(next, start, allowDollar);

964 } else if (next < 128) {

965 _appendKeywordToken(state.keyword());

966 return next;

967 } else {

968 return _tokenizeIdentifier(next, start, allowDollar);

969 }

970 }

971

972 int _tokenizeLessThan(int next) {

973 // < <= << <<=

974 next = _reader.advance();

975 if ($equal == next) {

976 _appendTokenOfType(TokenType.LT_EQ);

977 return _reader.advance();

978 } else if ($lt == next) {

979 return _select($equal, TokenType.LT_LT_EQ, TokenType.LT_LT);

980 } else {

981 _appendTokenOfType(TokenType.LT);

982 return next;

983 }

984 }

985

986 int _tokenizeMinus(int next) {

987 // - -- -=

988 next = _reader.advance();

989 if (next == $minus) {

990 _appendTokenOfType(TokenType.MINUS_MINUS);

991 return _reader.advance();

992 } else if (next == $equal) {

993 _appendTokenOfType(TokenType.MINUS_EQ);

994 return _reader.advance();

995 } else {

996 _appendTokenOfType(TokenType.MINUS);

997 return next;

998 }

999 }

1000

1001 int _tokenizeMultiLineComment(int next) {

1002 int nesting = 1;

1003 next = _reader.advance();

1004 while (true) {

1005 if (-1 == next) {

1006 _reportError(ScannerErrorCode.UNTERMINATED_MULTI_LINE_COMMENT);

1007 _appendCommentToken(

1008 TokenType.MULTI_LINE_COMMENT, _reader.getString(_tokenStart, 0));

1009 return next;

1010 } else if ($asterisk == next) {

1011 next = _reader.advance();

1012 if ($slash == next) {

1013 --nesting;

1014 if (0 == nesting) {

1015 _appendCommentToken(TokenType.MULTI_LINE_COMMENT,

1016 _reader.getString(_tokenStart, 0));

1017 return _reader.advance();

1018 } else {

1019 next = _reader.advance();

1020 }

1021 }

1022 } else if ($slash == next) {

1023 next = _reader.advance();

1024 if ($asterisk == next) {

1025 next = _reader.advance();

1026 ++nesting;

1027 }

1028 } else if (next == $cr) {

1029 next = _reader.advance();

1030 if (next == $lf) {

1031 next = _reader.advance();

1032 }

1033 recordStartOfLine();

1034 } else if (next == $lf) {

1035 next = _reader.advance();

1036 recordStartOfLine();

1037 } else {

1038 next = _reader.advance();

1039 }

1040 }

1041 }

1042

1043 int _tokenizeMultiLineRawString(int quoteChar, int start) {

1044 int next = _reader.advance();

1045 outer:

1046 while (next != -1) {

1047 while (next != quoteChar) {

1048 if (next == -1) {

1049 break outer;

1050 } else if (next == $cr) {

1051 next = _reader.advance();

1052 if (next == $lf) {

1053 next = _reader.advance();

1054 }

1055 recordStartOfLine();

1056 } else if (next == $lf) {

1057 next = _reader.advance();

1058 recordStartOfLine();

1059 } else {

1060 next = _reader.advance();

1061 }

1062 }

1063 next = _reader.advance();

1064 if (next == quoteChar) {

1065 next = _reader.advance();

1066 if (next == quoteChar) {

1067 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1068 return _reader.advance();

1069 }

1070 }

1071 }

1072 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);

1073 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1074 return _reader.advance();

1075 }

1076

1077 int _tokenizeMultiLineString(int quoteChar, int start, bool raw) {

1078 if (raw) {

1079 return _tokenizeMultiLineRawString(quoteChar, start);

1080 }

1081 int next = _reader.advance();

1082 while (next != -1) {

1083 if (next == $$) {

1084 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));

1085 next = _tokenizeStringInterpolation(start);

1086 _beginToken();

1087 start = _reader.offset;

1088 continue;

1089 }

1090 if (next == quoteChar) {

1091 next = _reader.advance();

1092 if (next == quoteChar) {

1093 next = _reader.advance();

1094 if (next == quoteChar) {

1095 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1096 return _reader.advance();

1097 }

1098 }

1099 continue;

1100 }

1101 if (next == $backslash) {

1102 next = _reader.advance();

1103 if (next == -1) {

1104 break;

1105 }

1106 if (next == $cr) {

1107 next = _reader.advance();

1108 if (next == $lf) {

1109 next = _reader.advance();

1110 }

1111 recordStartOfLine();

1112 } else if (next == $lf) {

1113 recordStartOfLine();

1114 next = _reader.advance();

1115 } else {

1116 next = _reader.advance();

1117 }

1118 } else if (next == $cr) {

1119 next = _reader.advance();

1120 if (next == $lf) {

1121 next = _reader.advance();

1122 }

1123 recordStartOfLine();

1124 } else if (next == $lf) {

1125 recordStartOfLine();

1126 next = _reader.advance();

1127 } else {

1128 next = _reader.advance();

1129 }

1130 }

1131 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);

1132 if (start == _reader.offset) {

1133 _appendStringTokenWithOffset(TokenType.STRING, "", 1);

1134 } else {

1135 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1136 }

1137 return _reader.advance();

1138 }

1139

1140 int _tokenizeMultiply(int next) =>

1141 _select($equal, TokenType.STAR_EQ, TokenType.STAR);

1142

1143 int _tokenizeNumber(int next) {

1144 int start = _reader.offset;

1145 while (true) {

1146 next = _reader.advance();

1147 if ($0 <= next && next <= $9) {

1148 continue;

1149 } else if (next == $dot) {

1150 return _tokenizeFractionPart(_reader.advance(), start);

1151 } else if (next == $e \|\| next == $E) {

1152 return _tokenizeFractionPart(next, start);

1153 } else {

1154 _appendStringToken(

1155 TokenType.INT, _reader.getString(start, next < 0 ? 0 : -1));

1156 return next;

1157 }

1158 }

1159 }

1160

1161 int _tokenizeOpenSquareBracket(int next) {

1162 // [ [] []=

1163 next = _reader.advance();

1164 if (next == $close_bracket) {

1165 return _select($equal, TokenType.INDEX_EQ, TokenType.INDEX);

1166 } else {

1167 _appendBeginToken(TokenType.OPEN_SQUARE_BRACKET);

1168 return next;

1169 }

1170 }

1171

1172 int _tokenizePercent(int next) =>

1173 _select($equal, TokenType.PERCENT_EQ, TokenType.PERCENT);

1174

1175 int _tokenizePlus(int next) {

1176 // + ++ +=

1177 next = _reader.advance();

1178 if ($plus == next) {

1179 _appendTokenOfType(TokenType.PLUS_PLUS);

1180 return _reader.advance();

1181 } else if ($equal == next) {

1182 _appendTokenOfType(TokenType.PLUS_EQ);

1183 return _reader.advance();

1184 } else {

1185 _appendTokenOfType(TokenType.PLUS);

1186 return next;

1187 }

1188 }

1189

1190 int _tokenizeQuestion() {

1191 // ? ?. ?? ??=

1192 int next = _reader.advance();

1193 if (next == $dot) {

1194 // '.'

1195 _appendTokenOfType(TokenType.QUESTION_PERIOD);

1196 return _reader.advance();

1197 } else if (next == $question) {

1198 // '?'

1199 next = _reader.advance();

1200 if (next == $equal) {

1201 // '='

1202 _appendTokenOfType(TokenType.QUESTION_QUESTION_EQ);

1203 return _reader.advance();

1204 } else {

1205 _appendTokenOfType(TokenType.QUESTION_QUESTION);

1206 return next;

1207 }

1208 } else {

1209 _appendTokenOfType(TokenType.QUESTION);

1210 return next;

1211 }

1212 }

1213

1214 int _tokenizeSingleLineComment(int next) {

1215 while (true) {

1216 next = _reader.advance();

1217 if (-1 == next) {

1218 _appendCommentToken(

1219 TokenType.SINGLE_LINE_COMMENT, _reader.getString(_tokenStart, 0));

1220 return next;

1221 } else if ($lf == next \|\| $cr == next) {

1222 _appendCommentToken(

1223 TokenType.SINGLE_LINE_COMMENT, _reader.getString(_tokenStart, -1));

1224 return next;

1225 }

1226 }

1227 }

1228

1229 int _tokenizeSingleLineRawString(int next, int quoteChar, int start) {

1230 next = _reader.advance();

1231 while (next != -1) {

1232 if (next == quoteChar) {

1233 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1234 return _reader.advance();

1235 } else if (next == $cr \|\| next == $lf) {

1236 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);

1237 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));

1238 return _reader.advance();

1239 }

1240 next = _reader.advance();

1241 }

1242 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);

1243 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1244 return _reader.advance();

1245 }

1246

1247 int _tokenizeSingleLineString(int next, int quoteChar, int start) {

1248 while (next != quoteChar) {

1249 if (next == $backslash) {

1250 next = _reader.advance();

1251 } else if (next == $$) {

1252 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));

1253 next = _tokenizeStringInterpolation(start);

1254 _beginToken();

1255 start = _reader.offset;

1256 continue;

1257 }

1258 if (next <= $cr && (next == $lf \|\| next == $cr \|\| next == -1)) {

1259 _reportError(ScannerErrorCode.UNTERMINATED_STRING_LITERAL);

1260 if (start == _reader.offset) {

1261 _appendStringTokenWithOffset(TokenType.STRING, "", 1);

1262 } else if (next == -1) {

1263 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1264 } else {

1265 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));

1266 }

1267 return _reader.advance();

1268 }

1269 next = _reader.advance();

1270 }

1271 _appendStringToken(TokenType.STRING, _reader.getString(start, 0));

1272 return _reader.advance();

1273 }

1274

1275 int _tokenizeSlashOrComment(int next) {

1276 next = _reader.advance();

1277 if ($asterisk == next) {

1278 return _tokenizeMultiLineComment(next);

1279 } else if ($slash == next) {

1280 return _tokenizeSingleLineComment(next);

1281 } else if ($equal == next) {

1282 _appendTokenOfType(TokenType.SLASH_EQ);

1283 return _reader.advance();

1284 } else {

1285 _appendTokenOfType(TokenType.SLASH);

1286 return next;

1287 }

1288 }

1289

1290 int _tokenizeString(int next, int start, bool raw) {

1291 int quoteChar = next;

1292 next = _reader.advance();

1293 if (quoteChar == next) {

1294 next = _reader.advance();

1295 if (quoteChar == next) {

1296 // Multiline string.

1297 return _tokenizeMultiLineString(quoteChar, start, raw);

1298 } else {

1299 // Empty string.

1300 _appendStringToken(TokenType.STRING, _reader.getString(start, -1));

1301 return next;

1302 }

1303 }

1304 if (raw) {

1305 return _tokenizeSingleLineRawString(next, quoteChar, start);

1306 } else {

1307 return _tokenizeSingleLineString(next, quoteChar, start);

1308 }

1309 }

1310

1311 int _tokenizeStringInterpolation(int start) {

1312 _beginToken();

1313 int next = _reader.advance();

1314 if (next == $lbrace) {

1315 return _tokenizeInterpolatedExpression(next, start);

1316 } else {

1317 return _tokenizeInterpolatedIdentifier(next, start);

1318 }

1319 }

1320

1321 int _tokenizeTag(int next) {

1322 // # or #!.*[\n\r]

1323 if (_reader.offset == 0) {

1324 if (_reader.peek() == $exclamation) {

1325 do {

1326 next = _reader.advance();

1327 } while (next != $lf && next != $cr && next > 0);

1328 _appendStringToken(

1329 TokenType.SCRIPT_TAG, _reader.getString(_tokenStart, 0));

1330 return next;

1331 }

1332 }

1333 _appendTokenOfType(TokenType.HASH);

1334 return _reader.advance();

1335 }

1336

1337 int _tokenizeTilde(int next) {

1338 // ~ ~/ ~/=

1339 next = _reader.advance();

1340 if (next == $slash) {

1341 return _select($equal, TokenType.TILDE_SLASH_EQ, TokenType.TILDE_SLASH);

1342 } else {

1343 _appendTokenOfType(TokenType.TILDE);

1344 return next;

1345 }

1346 }

1347

1348 /**

1349 * Checks if [value] is a single-line or multi-line comment.

1350 */

1351 static bool _isDocumentationComment(String value) {

1352 return StringUtilities.startsWith3(value, 0, $slash, $slash, $slash) \|\|

1353 StringUtilities.startsWith3(value, 0, $slash, $asterisk, $asterisk);

1354 }	53 }

1355 }	54 }

OLD	NEW