| OLD | NEW |
| 1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 library fasta.analyzer.token_utils; | 5 library fasta.analyzer.token_utils; |
| 6 | 6 |
| 7 import 'package:front_end/src/fasta/parser/error_kind.dart' show ErrorKind; | |
| 8 | |
| 9 import 'package:front_end/src/fasta/scanner/error_token.dart' show ErrorToken; | 7 import 'package:front_end/src/fasta/scanner/error_token.dart' show ErrorToken; |
| 10 | 8 |
| 11 import 'package:front_end/src/fasta/scanner/keyword.dart' show Keyword; | 9 import 'package:front_end/src/fasta/scanner/keyword.dart' show Keyword; |
| 12 | 10 |
| 13 import 'package:front_end/src/fasta/scanner/precedence.dart'; | 11 import 'package:front_end/src/fasta/scanner/precedence.dart'; |
| 14 | 12 |
| 15 import 'package:front_end/src/fasta/scanner/token.dart' | 13 import 'package:front_end/src/fasta/scanner/token.dart' |
| 16 show | 14 show |
| 17 BeginGroupToken, | 15 BeginGroupToken, |
| 18 CommentToken, | 16 CommentToken, |
| 19 DartDocToken, | 17 DartDocToken, |
| 20 KeywordToken, | 18 KeywordToken, |
| 21 StringToken, | 19 StringToken, |
| 22 SymbolToken, | 20 SymbolToken, |
| 23 Token; | 21 Token; |
| 24 | 22 |
| 25 import 'package:front_end/src/fasta/scanner/token_constants.dart'; | 23 import 'package:front_end/src/fasta/scanner/token_constants.dart'; |
| 26 | 24 |
| 25 import 'package:front_end/src/scanner/errors.dart' show translateErrorToken; |
| 26 |
| 27 import 'package:front_end/src/scanner/token.dart' as analyzer | 27 import 'package:front_end/src/scanner/token.dart' as analyzer |
| 28 show | 28 show |
| 29 BeginToken, | 29 BeginToken, |
| 30 BeginTokenWithComment, | 30 BeginTokenWithComment, |
| 31 CommentToken, | 31 CommentToken, |
| 32 Keyword, | 32 Keyword, |
| 33 KeywordToken, | 33 KeywordToken, |
| 34 KeywordTokenWithComment, | 34 KeywordTokenWithComment, |
| 35 StringToken, | 35 StringToken, |
| 36 StringTokenWithComment, | 36 StringTokenWithComment, |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 85 analyzer.Token convertTokens(Token token) { | 85 analyzer.Token convertTokens(Token token) { |
| 86 _analyzerTokenHead = new analyzer.Token(TokenType.EOF, -1); | 86 _analyzerTokenHead = new analyzer.Token(TokenType.EOF, -1); |
| 87 _analyzerTokenHead.previous = _analyzerTokenHead; | 87 _analyzerTokenHead.previous = _analyzerTokenHead; |
| 88 _analyzerTokenTail = _analyzerTokenHead; | 88 _analyzerTokenTail = _analyzerTokenHead; |
| 89 _beginTokenStack = [null]; | 89 _beginTokenStack = [null]; |
| 90 _endTokenStack = <Token>[null]; | 90 _endTokenStack = <Token>[null]; |
| 91 | 91 |
| 92 while (true) { | 92 while (true) { |
| 93 if (token.info.kind == BAD_INPUT_TOKEN) { | 93 if (token.info.kind == BAD_INPUT_TOKEN) { |
| 94 ErrorToken errorToken = token; | 94 ErrorToken errorToken = token; |
| 95 _translateErrorToken(errorToken); | 95 translateErrorToken(errorToken, reportError); |
| 96 } else { | 96 } else { |
| 97 var translatedToken = translateToken( | 97 var translatedToken = translateToken( |
| 98 token, translateCommentTokens(token.precedingCommentTokens)); | 98 token, translateCommentTokens(token.precedingCommentTokens)); |
| 99 _matchGroups(token, translatedToken); | 99 _matchGroups(token, translatedToken); |
| 100 translatedToken.setNext(translatedToken); | 100 translatedToken.setNext(translatedToken); |
| 101 _analyzerTokenTail.setNext(translatedToken); | 101 _analyzerTokenTail.setNext(translatedToken); |
| 102 translatedToken.previous = _analyzerTokenTail; | 102 translatedToken.previous = _analyzerTokenTail; |
| 103 _analyzerTokenTail = translatedToken; | 103 _analyzerTokenTail = translatedToken; |
| 104 } | 104 } |
| 105 if (token.isEof) { | 105 if (token.isEof) { |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 156 } | 156 } |
| 157 // Synthetic end tokens use the same offset as the begin token. | 157 // Synthetic end tokens use the same offset as the begin token. |
| 158 if (translatedToken is analyzer.BeginToken && | 158 if (translatedToken is analyzer.BeginToken && |
| 159 token is BeginGroupToken && | 159 token is BeginGroupToken && |
| 160 token.endGroup != null && | 160 token.endGroup != null && |
| 161 token.endGroup.charOffset != token.charOffset) { | 161 token.endGroup.charOffset != token.charOffset) { |
| 162 _beginTokenStack.add(translatedToken); | 162 _beginTokenStack.add(translatedToken); |
| 163 _endTokenStack.add(token.endGroup); | 163 _endTokenStack.add(token.endGroup); |
| 164 } | 164 } |
| 165 } | 165 } |
| 166 | |
| 167 /// Translates the given error [token] into an analyzer error and reports it | |
| 168 /// using [reportError]. | |
| 169 void _translateErrorToken(ErrorToken token) { | |
| 170 int charOffset = token.charOffset; | |
| 171 // TODO(paulberry,ahe): why is endOffset sometimes null? | |
| 172 int endOffset = token.endOffset ?? charOffset; | |
| 173 void _makeError( | |
| 174 analyzer.ScannerErrorCode errorCode, List<Object> arguments) { | |
| 175 if (_isAtEnd(token, charOffset)) { | |
| 176 // Analyzer never generates an error message past the end of the input, | |
| 177 // since such an error would not be visible in an editor. | |
| 178 // TODO(paulberry,ahe): would it make sense to replicate this behavior | |
| 179 // in fasta, or move it elsewhere in analyzer? | |
| 180 charOffset--; | |
| 181 } | |
| 182 reportError(errorCode, charOffset, arguments); | |
| 183 } | |
| 184 | |
| 185 var errorCode = token.errorCode; | |
| 186 switch (errorCode) { | |
| 187 case ErrorKind.UnterminatedString: | |
| 188 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
| 189 // string; analyzer expects the end of the string. | |
| 190 charOffset = endOffset; | |
| 191 return _makeError( | |
| 192 analyzer.ScannerErrorCode.UNTERMINATED_STRING_LITERAL, null); | |
| 193 case ErrorKind.UnmatchedToken: | |
| 194 return null; | |
| 195 case ErrorKind.UnterminatedComment: | |
| 196 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
| 197 // comment; analyzer expects the end of the comment. | |
| 198 charOffset = endOffset; | |
| 199 return _makeError( | |
| 200 analyzer.ScannerErrorCode.UNTERMINATED_MULTI_LINE_COMMENT, null); | |
| 201 case ErrorKind.MissingExponent: | |
| 202 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
| 203 // number; analyzer expects the end of the number. | |
| 204 charOffset = endOffset; | |
| 205 return _makeError(analyzer.ScannerErrorCode.MISSING_DIGIT, null); | |
| 206 case ErrorKind.ExpectedHexDigit: | |
| 207 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
| 208 // number; analyzer expects the end of the number. | |
| 209 charOffset = endOffset; | |
| 210 return _makeError(analyzer.ScannerErrorCode.MISSING_HEX_DIGIT, null); | |
| 211 case ErrorKind.NonAsciiIdentifier: | |
| 212 case ErrorKind.NonAsciiWhitespace: | |
| 213 return _makeError( | |
| 214 analyzer.ScannerErrorCode.ILLEGAL_CHARACTER, [token.character]); | |
| 215 case ErrorKind.UnexpectedDollarInString: | |
| 216 return null; | |
| 217 default: | |
| 218 throw new UnimplementedError('$errorCode'); | |
| 219 } | |
| 220 } | |
| 221 } | 166 } |
| 222 | 167 |
| 223 /// Converts a single Fasta comment token to an analyzer comment token. | 168 /// Converts a single Fasta comment token to an analyzer comment token. |
| 224 analyzer.CommentToken toAnalyzerCommentToken(Token token) { | 169 analyzer.CommentToken toAnalyzerCommentToken(Token token) { |
| 225 // TODO(paulberry,ahe): It would be nice if the scanner gave us an | 170 // TODO(paulberry,ahe): It would be nice if the scanner gave us an |
| 226 // easier way to distinguish between the two types of comment. | 171 // easier way to distinguish between the two types of comment. |
| 227 var type = token.lexeme.startsWith('/*') | 172 var type = token.lexeme.startsWith('/*') |
| 228 ? TokenType.MULTI_LINE_COMMENT | 173 ? TokenType.MULTI_LINE_COMMENT |
| 229 : TokenType.SINGLE_LINE_COMMENT; | 174 : TokenType.SINGLE_LINE_COMMENT; |
| 230 return new analyzer.CommentToken(type, token.lexeme, token.charOffset); | 175 return new analyzer.CommentToken(type, token.lexeme, token.charOffset); |
| 231 } | 176 } |
| 232 | 177 |
| 233 /// Converts a stream of Analyzer tokens (starting with [token] and continuing | 178 /// Converts a stream of Analyzer tokens (starting with [token] and continuing |
| 234 /// to EOF) to a stream of Fasta tokens. | 179 /// to EOF) to a stream of Fasta tokens. |
| 235 /// | 180 /// |
| 236 /// TODO(paulberry): Analyzer tokens do not record error conditions, so a round | 181 /// TODO(paulberry): Analyzer tokens do not record error conditions, so a round |
| 237 /// trip through this function and [toAnalyzerTokenStream] will lose error | 182 /// trip through this function and [toAnalyzerTokenStream] will lose error |
| 238 /// information. | 183 /// information. |
| 239 Token fromAnalyzerTokenStream(analyzer.Token analyzerToken) { | 184 Token fromAnalyzerTokenStream(analyzer.Token analyzerToken) { |
| 240 Token tokenHead = new SymbolToken(EOF_INFO, -1); | 185 Token tokenHead = new SymbolToken.eof(-1); |
| 241 tokenHead.previous = tokenHead; | |
| 242 Token tokenTail = tokenHead; | 186 Token tokenTail = tokenHead; |
| 243 | 187 |
| 244 // Both fasta and analyzer have links from a "BeginToken" to its matching | 188 // Both fasta and analyzer have links from a "BeginToken" to its matching |
| 245 // "EndToken" in a group (like parentheses and braces). However, only fasta | 189 // "EndToken" in a group (like parentheses and braces). However, only fasta |
| 246 // makes these links for angle brackets. We use these stacks to map the | 190 // makes these links for angle brackets. We use these stacks to map the |
| 247 // links from the analyzer token stream into equivalent links in the fasta | 191 // links from the analyzer token stream into equivalent links in the fasta |
| 248 // token stream, and to create the links that fasta expects for angle | 192 // token stream, and to create the links that fasta expects for angle |
| 249 // brackets. | 193 // brackets. |
| 250 | 194 |
| 251 // Note: beginTokenStack and endTokenStack are seeded with a sentinel value | 195 // Note: beginTokenStack and endTokenStack are seeded with a sentinel value |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 306 tokenTail.next = token; | 250 tokenTail.next = token; |
| 307 tokenTail.next.previousToken = tokenTail; | 251 tokenTail.next.previousToken = tokenTail; |
| 308 tokenTail = token; | 252 tokenTail = token; |
| 309 matchGroups(analyzerToken, token); | 253 matchGroups(analyzerToken, token); |
| 310 return analyzerToken.next; | 254 return analyzerToken.next; |
| 311 } | 255 } |
| 312 | 256 |
| 313 while (true) { | 257 while (true) { |
| 314 // TODO(paulberry): join up begingroup/endgroup. | 258 // TODO(paulberry): join up begingroup/endgroup. |
| 315 if (analyzerToken.type == TokenType.EOF) { | 259 if (analyzerToken.type == TokenType.EOF) { |
| 316 tokenTail.next = new SymbolToken(EOF_INFO, analyzerToken.offset); | 260 tokenTail.next = new SymbolToken.eof(analyzerToken.offset); |
| 317 tokenTail.next.previousToken = tokenTail; | 261 tokenTail.next.previousToken = tokenTail; |
| 318 tokenTail.next.precedingCommentTokens = | 262 tokenTail.next.precedingCommentTokens = |
| 319 translateComments(analyzerToken.precedingComments); | 263 translateComments(analyzerToken.precedingComments); |
| 320 tokenTail.next.next = tokenTail.next; | 264 tokenTail.next.next = tokenTail.next; |
| 321 return tokenHead.next; | 265 return tokenHead.next; |
| 322 } | 266 } |
| 323 analyzerToken = translateAndAppend(analyzerToken); | 267 analyzerToken = translateAndAppend(analyzerToken); |
| 324 } | 268 } |
| 325 } | 269 } |
| 326 | 270 |
| (...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 499 return symbol(BACKSLASH_INFO); | 443 return symbol(BACKSLASH_INFO); |
| 500 case TokenType.PERIOD_PERIOD_PERIOD: | 444 case TokenType.PERIOD_PERIOD_PERIOD: |
| 501 return symbol(PERIOD_PERIOD_PERIOD_INFO); | 445 return symbol(PERIOD_PERIOD_PERIOD_INFO); |
| 502 // case TokenType.GENERIC_METHOD_TYPE_ASSIGN | 446 // case TokenType.GENERIC_METHOD_TYPE_ASSIGN |
| 503 // case TokenType.GENERIC_METHOD_TYPE_LIST | 447 // case TokenType.GENERIC_METHOD_TYPE_LIST |
| 504 default: | 448 default: |
| 505 return internalError('Unhandled token type ${token.type}'); | 449 return internalError('Unhandled token type ${token.type}'); |
| 506 } | 450 } |
| 507 } | 451 } |
| 508 | 452 |
| 509 /// Determines whether the given [charOffset], which came from the non-EOF token | |
| 510 /// [token], represents the end of the input. | |
| 511 bool _isAtEnd(Token token, int charOffset) { | |
| 512 while (true) { | |
| 513 // Skip to the next token. | |
| 514 token = token.next; | |
| 515 // If we've found an EOF token, its charOffset indicates where the end of | |
| 516 // the input is. | |
| 517 if (token.isEof) return token.charOffset == charOffset; | |
| 518 // If we've found a non-error token, then we know there is additional input | |
| 519 // text after [charOffset]. | |
| 520 if (token.info.kind != BAD_INPUT_TOKEN) return false; | |
| 521 // Otherwise keep looking. | |
| 522 } | |
| 523 } | |
| 524 | |
| 525 analyzer.Token toAnalyzerToken(Token token, | 453 analyzer.Token toAnalyzerToken(Token token, |
| 526 [analyzer.CommentToken commentToken]) { | 454 [analyzer.CommentToken commentToken]) { |
| 527 if (token == null) return null; | 455 if (token == null) return null; |
| 528 analyzer.Token makeStringToken(TokenType tokenType) { | 456 analyzer.Token makeStringToken(TokenType tokenType) { |
| 529 if (commentToken == null) { | 457 if (commentToken == null) { |
| 530 return new analyzer.StringToken( | 458 return new analyzer.StringToken( |
| 531 tokenType, token.lexeme, token.charOffset); | 459 tokenType, token.lexeme, token.charOffset); |
| 532 } else { | 460 } else { |
| 533 return new analyzer.StringTokenWithComment( | 461 return new analyzer.StringTokenWithComment( |
| 534 tokenType, token.lexeme, token.charOffset, commentToken); | 462 tokenType, token.lexeme, token.charOffset, commentToken); |
| (...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 804 case PERIOD_PERIOD_PERIOD_TOKEN: | 732 case PERIOD_PERIOD_PERIOD_TOKEN: |
| 805 return TokenType.PERIOD_PERIOD_PERIOD; | 733 return TokenType.PERIOD_PERIOD_PERIOD; |
| 806 // case GENERIC_METHOD_TYPE_LIST_TOKEN: | 734 // case GENERIC_METHOD_TYPE_LIST_TOKEN: |
| 807 // return TokenType.GENERIC_METHOD_TYPE_LIST; | 735 // return TokenType.GENERIC_METHOD_TYPE_LIST; |
| 808 // case GENERIC_METHOD_TYPE_ASSIGN_TOKEN: | 736 // case GENERIC_METHOD_TYPE_ASSIGN_TOKEN: |
| 809 // return TokenType.GENERIC_METHOD_TYPE_ASSIGN; | 737 // return TokenType.GENERIC_METHOD_TYPE_ASSIGN; |
| 810 default: | 738 default: |
| 811 return internalError("Unhandled token ${token.info}"); | 739 return internalError("Unhandled token ${token.info}"); |
| 812 } | 740 } |
| 813 } | 741 } |
| OLD | NEW |