OLD | NEW |
1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 library fasta.analyzer.token_utils; | 5 library fasta.analyzer.token_utils; |
6 | 6 |
7 import 'package:front_end/src/fasta/parser/error_kind.dart' show ErrorKind; | |
8 | |
9 import 'package:front_end/src/fasta/scanner/error_token.dart' show ErrorToken; | 7 import 'package:front_end/src/fasta/scanner/error_token.dart' show ErrorToken; |
10 | 8 |
11 import 'package:front_end/src/fasta/scanner/keyword.dart' show Keyword; | 9 import 'package:front_end/src/fasta/scanner/keyword.dart' show Keyword; |
12 | 10 |
13 import 'package:front_end/src/fasta/scanner/precedence.dart'; | 11 import 'package:front_end/src/fasta/scanner/precedence.dart'; |
14 | 12 |
15 import 'package:front_end/src/fasta/scanner/token.dart' | 13 import 'package:front_end/src/fasta/scanner/token.dart' |
16 show | 14 show |
17 BeginGroupToken, | 15 BeginGroupToken, |
18 CommentToken, | 16 CommentToken, |
19 DartDocToken, | 17 DartDocToken, |
20 KeywordToken, | 18 KeywordToken, |
21 StringToken, | 19 StringToken, |
22 SymbolToken, | 20 SymbolToken, |
23 Token; | 21 Token; |
24 | 22 |
25 import 'package:front_end/src/fasta/scanner/token_constants.dart'; | 23 import 'package:front_end/src/fasta/scanner/token_constants.dart'; |
26 | 24 |
| 25 import 'package:front_end/src/scanner/errors.dart' show translateErrorToken; |
| 26 |
27 import 'package:front_end/src/scanner/token.dart' as analyzer | 27 import 'package:front_end/src/scanner/token.dart' as analyzer |
28 show | 28 show |
29 BeginToken, | 29 BeginToken, |
30 BeginTokenWithComment, | 30 BeginTokenWithComment, |
31 CommentToken, | 31 CommentToken, |
32 Keyword, | 32 Keyword, |
33 KeywordToken, | 33 KeywordToken, |
34 KeywordTokenWithComment, | 34 KeywordTokenWithComment, |
35 StringToken, | 35 StringToken, |
36 StringTokenWithComment, | 36 StringTokenWithComment, |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
85 analyzer.Token convertTokens(Token token) { | 85 analyzer.Token convertTokens(Token token) { |
86 _analyzerTokenHead = new analyzer.Token(TokenType.EOF, -1); | 86 _analyzerTokenHead = new analyzer.Token(TokenType.EOF, -1); |
87 _analyzerTokenHead.previous = _analyzerTokenHead; | 87 _analyzerTokenHead.previous = _analyzerTokenHead; |
88 _analyzerTokenTail = _analyzerTokenHead; | 88 _analyzerTokenTail = _analyzerTokenHead; |
89 _beginTokenStack = [null]; | 89 _beginTokenStack = [null]; |
90 _endTokenStack = <Token>[null]; | 90 _endTokenStack = <Token>[null]; |
91 | 91 |
92 while (true) { | 92 while (true) { |
93 if (token.info.kind == BAD_INPUT_TOKEN) { | 93 if (token.info.kind == BAD_INPUT_TOKEN) { |
94 ErrorToken errorToken = token; | 94 ErrorToken errorToken = token; |
95 _translateErrorToken(errorToken); | 95 translateErrorToken(errorToken, reportError); |
96 } else { | 96 } else { |
97 var translatedToken = translateToken( | 97 var translatedToken = translateToken( |
98 token, translateCommentTokens(token.precedingCommentTokens)); | 98 token, translateCommentTokens(token.precedingCommentTokens)); |
99 _matchGroups(token, translatedToken); | 99 _matchGroups(token, translatedToken); |
100 translatedToken.setNext(translatedToken); | 100 translatedToken.setNext(translatedToken); |
101 _analyzerTokenTail.setNext(translatedToken); | 101 _analyzerTokenTail.setNext(translatedToken); |
102 translatedToken.previous = _analyzerTokenTail; | 102 translatedToken.previous = _analyzerTokenTail; |
103 _analyzerTokenTail = translatedToken; | 103 _analyzerTokenTail = translatedToken; |
104 } | 104 } |
105 if (token.isEof) { | 105 if (token.isEof) { |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
156 } | 156 } |
157 // Synthetic end tokens use the same offset as the begin token. | 157 // Synthetic end tokens use the same offset as the begin token. |
158 if (translatedToken is analyzer.BeginToken && | 158 if (translatedToken is analyzer.BeginToken && |
159 token is BeginGroupToken && | 159 token is BeginGroupToken && |
160 token.endGroup != null && | 160 token.endGroup != null && |
161 token.endGroup.charOffset != token.charOffset) { | 161 token.endGroup.charOffset != token.charOffset) { |
162 _beginTokenStack.add(translatedToken); | 162 _beginTokenStack.add(translatedToken); |
163 _endTokenStack.add(token.endGroup); | 163 _endTokenStack.add(token.endGroup); |
164 } | 164 } |
165 } | 165 } |
166 | |
167 /// Translates the given error [token] into an analyzer error and reports it | |
168 /// using [reportError]. | |
169 void _translateErrorToken(ErrorToken token) { | |
170 int charOffset = token.charOffset; | |
171 // TODO(paulberry,ahe): why is endOffset sometimes null? | |
172 int endOffset = token.endOffset ?? charOffset; | |
173 void _makeError( | |
174 analyzer.ScannerErrorCode errorCode, List<Object> arguments) { | |
175 if (_isAtEnd(token, charOffset)) { | |
176 // Analyzer never generates an error message past the end of the input, | |
177 // since such an error would not be visible in an editor. | |
178 // TODO(paulberry,ahe): would it make sense to replicate this behavior | |
179 // in fasta, or move it elsewhere in analyzer? | |
180 charOffset--; | |
181 } | |
182 reportError(errorCode, charOffset, arguments); | |
183 } | |
184 | |
185 var errorCode = token.errorCode; | |
186 switch (errorCode) { | |
187 case ErrorKind.UnterminatedString: | |
188 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
189 // string; analyzer expects the end of the string. | |
190 charOffset = endOffset; | |
191 return _makeError( | |
192 analyzer.ScannerErrorCode.UNTERMINATED_STRING_LITERAL, null); | |
193 case ErrorKind.UnmatchedToken: | |
194 return null; | |
195 case ErrorKind.UnterminatedComment: | |
196 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
197 // comment; analyzer expects the end of the comment. | |
198 charOffset = endOffset; | |
199 return _makeError( | |
200 analyzer.ScannerErrorCode.UNTERMINATED_MULTI_LINE_COMMENT, null); | |
201 case ErrorKind.MissingExponent: | |
202 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
203 // number; analyzer expects the end of the number. | |
204 charOffset = endOffset; | |
205 return _makeError(analyzer.ScannerErrorCode.MISSING_DIGIT, null); | |
206 case ErrorKind.ExpectedHexDigit: | |
207 // TODO(paulberry,ahe): Fasta reports the error location as the entire | |
208 // number; analyzer expects the end of the number. | |
209 charOffset = endOffset; | |
210 return _makeError(analyzer.ScannerErrorCode.MISSING_HEX_DIGIT, null); | |
211 case ErrorKind.NonAsciiIdentifier: | |
212 case ErrorKind.NonAsciiWhitespace: | |
213 return _makeError( | |
214 analyzer.ScannerErrorCode.ILLEGAL_CHARACTER, [token.character]); | |
215 case ErrorKind.UnexpectedDollarInString: | |
216 return null; | |
217 default: | |
218 throw new UnimplementedError('$errorCode'); | |
219 } | |
220 } | |
221 } | 166 } |
222 | 167 |
223 /// Converts a single Fasta comment token to an analyzer comment token. | 168 /// Converts a single Fasta comment token to an analyzer comment token. |
224 analyzer.CommentToken toAnalyzerCommentToken(Token token) { | 169 analyzer.CommentToken toAnalyzerCommentToken(Token token) { |
225 // TODO(paulberry,ahe): It would be nice if the scanner gave us an | 170 // TODO(paulberry,ahe): It would be nice if the scanner gave us an |
226 // easier way to distinguish between the two types of comment. | 171 // easier way to distinguish between the two types of comment. |
227 var type = token.lexeme.startsWith('/*') | 172 var type = token.lexeme.startsWith('/*') |
228 ? TokenType.MULTI_LINE_COMMENT | 173 ? TokenType.MULTI_LINE_COMMENT |
229 : TokenType.SINGLE_LINE_COMMENT; | 174 : TokenType.SINGLE_LINE_COMMENT; |
230 return new analyzer.CommentToken(type, token.lexeme, token.charOffset); | 175 return new analyzer.CommentToken(type, token.lexeme, token.charOffset); |
231 } | 176 } |
232 | 177 |
233 /// Converts a stream of Analyzer tokens (starting with [token] and continuing | 178 /// Converts a stream of Analyzer tokens (starting with [token] and continuing |
234 /// to EOF) to a stream of Fasta tokens. | 179 /// to EOF) to a stream of Fasta tokens. |
235 /// | 180 /// |
236 /// TODO(paulberry): Analyzer tokens do not record error conditions, so a round | 181 /// TODO(paulberry): Analyzer tokens do not record error conditions, so a round |
237 /// trip through this function and [toAnalyzerTokenStream] will lose error | 182 /// trip through this function and [toAnalyzerTokenStream] will lose error |
238 /// information. | 183 /// information. |
239 Token fromAnalyzerTokenStream(analyzer.Token analyzerToken) { | 184 Token fromAnalyzerTokenStream(analyzer.Token analyzerToken) { |
240 Token tokenHead = new SymbolToken(EOF_INFO, -1); | 185 Token tokenHead = new SymbolToken.eof(-1); |
241 tokenHead.previous = tokenHead; | |
242 Token tokenTail = tokenHead; | 186 Token tokenTail = tokenHead; |
243 | 187 |
244 // Both fasta and analyzer have links from a "BeginToken" to its matching | 188 // Both fasta and analyzer have links from a "BeginToken" to its matching |
245 // "EndToken" in a group (like parentheses and braces). However, only fasta | 189 // "EndToken" in a group (like parentheses and braces). However, only fasta |
246 // makes these links for angle brackets. We use these stacks to map the | 190 // makes these links for angle brackets. We use these stacks to map the |
247 // links from the analyzer token stream into equivalent links in the fasta | 191 // links from the analyzer token stream into equivalent links in the fasta |
248 // token stream, and to create the links that fasta expects for angle | 192 // token stream, and to create the links that fasta expects for angle |
249 // brackets. | 193 // brackets. |
250 | 194 |
251 // Note: beginTokenStack and endTokenStack are seeded with a sentinel value | 195 // Note: beginTokenStack and endTokenStack are seeded with a sentinel value |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
306 tokenTail.next = token; | 250 tokenTail.next = token; |
307 tokenTail.next.previousToken = tokenTail; | 251 tokenTail.next.previousToken = tokenTail; |
308 tokenTail = token; | 252 tokenTail = token; |
309 matchGroups(analyzerToken, token); | 253 matchGroups(analyzerToken, token); |
310 return analyzerToken.next; | 254 return analyzerToken.next; |
311 } | 255 } |
312 | 256 |
313 while (true) { | 257 while (true) { |
314 // TODO(paulberry): join up begingroup/endgroup. | 258 // TODO(paulberry): join up begingroup/endgroup. |
315 if (analyzerToken.type == TokenType.EOF) { | 259 if (analyzerToken.type == TokenType.EOF) { |
316 tokenTail.next = new SymbolToken(EOF_INFO, analyzerToken.offset); | 260 tokenTail.next = new SymbolToken.eof(analyzerToken.offset); |
317 tokenTail.next.previousToken = tokenTail; | 261 tokenTail.next.previousToken = tokenTail; |
318 tokenTail.next.precedingCommentTokens = | 262 tokenTail.next.precedingCommentTokens = |
319 translateComments(analyzerToken.precedingComments); | 263 translateComments(analyzerToken.precedingComments); |
320 tokenTail.next.next = tokenTail.next; | 264 tokenTail.next.next = tokenTail.next; |
321 return tokenHead.next; | 265 return tokenHead.next; |
322 } | 266 } |
323 analyzerToken = translateAndAppend(analyzerToken); | 267 analyzerToken = translateAndAppend(analyzerToken); |
324 } | 268 } |
325 } | 269 } |
326 | 270 |
(...skipping 172 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
499 return symbol(BACKSLASH_INFO); | 443 return symbol(BACKSLASH_INFO); |
500 case TokenType.PERIOD_PERIOD_PERIOD: | 444 case TokenType.PERIOD_PERIOD_PERIOD: |
501 return symbol(PERIOD_PERIOD_PERIOD_INFO); | 445 return symbol(PERIOD_PERIOD_PERIOD_INFO); |
502 // case TokenType.GENERIC_METHOD_TYPE_ASSIGN | 446 // case TokenType.GENERIC_METHOD_TYPE_ASSIGN |
503 // case TokenType.GENERIC_METHOD_TYPE_LIST | 447 // case TokenType.GENERIC_METHOD_TYPE_LIST |
504 default: | 448 default: |
505 return internalError('Unhandled token type ${token.type}'); | 449 return internalError('Unhandled token type ${token.type}'); |
506 } | 450 } |
507 } | 451 } |
508 | 452 |
509 /// Determines whether the given [charOffset], which came from the non-EOF token | |
510 /// [token], represents the end of the input. | |
511 bool _isAtEnd(Token token, int charOffset) { | |
512 while (true) { | |
513 // Skip to the next token. | |
514 token = token.next; | |
515 // If we've found an EOF token, its charOffset indicates where the end of | |
516 // the input is. | |
517 if (token.isEof) return token.charOffset == charOffset; | |
518 // If we've found a non-error token, then we know there is additional input | |
519 // text after [charOffset]. | |
520 if (token.info.kind != BAD_INPUT_TOKEN) return false; | |
521 // Otherwise keep looking. | |
522 } | |
523 } | |
524 | |
525 analyzer.Token toAnalyzerToken(Token token, | 453 analyzer.Token toAnalyzerToken(Token token, |
526 [analyzer.CommentToken commentToken]) { | 454 [analyzer.CommentToken commentToken]) { |
527 if (token == null) return null; | 455 if (token == null) return null; |
528 analyzer.Token makeStringToken(TokenType tokenType) { | 456 analyzer.Token makeStringToken(TokenType tokenType) { |
529 if (commentToken == null) { | 457 if (commentToken == null) { |
530 return new analyzer.StringToken( | 458 return new analyzer.StringToken( |
531 tokenType, token.lexeme, token.charOffset); | 459 tokenType, token.lexeme, token.charOffset); |
532 } else { | 460 } else { |
533 return new analyzer.StringTokenWithComment( | 461 return new analyzer.StringTokenWithComment( |
534 tokenType, token.lexeme, token.charOffset, commentToken); | 462 tokenType, token.lexeme, token.charOffset, commentToken); |
(...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
804 case PERIOD_PERIOD_PERIOD_TOKEN: | 732 case PERIOD_PERIOD_PERIOD_TOKEN: |
805 return TokenType.PERIOD_PERIOD_PERIOD; | 733 return TokenType.PERIOD_PERIOD_PERIOD; |
806 // case GENERIC_METHOD_TYPE_LIST_TOKEN: | 734 // case GENERIC_METHOD_TYPE_LIST_TOKEN: |
807 // return TokenType.GENERIC_METHOD_TYPE_LIST; | 735 // return TokenType.GENERIC_METHOD_TYPE_LIST; |
808 // case GENERIC_METHOD_TYPE_ASSIGN_TOKEN: | 736 // case GENERIC_METHOD_TYPE_ASSIGN_TOKEN: |
809 // return TokenType.GENERIC_METHOD_TYPE_ASSIGN; | 737 // return TokenType.GENERIC_METHOD_TYPE_ASSIGN; |
810 default: | 738 default: |
811 return internalError("Unhandled token ${token.info}"); | 739 return internalError("Unhandled token ${token.info}"); |
812 } | 740 } |
813 } | 741 } |
OLD | NEW |