| OLD | NEW |
| 1 // Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style licenset hat can be found in the LICENSE file. | 3 // BSD-style licenset hat can be found in the LICENSE file. |
| 4 | 4 |
| 5 library fasta.scanner.recover; | 5 library fasta.scanner.recover; |
| 6 | 6 |
| 7 import 'token.dart' show | 7 import 'token.dart' show |
| 8 StringToken, |
| 8 Token; | 9 Token; |
| 9 | 10 |
| 11 import 'error_token.dart' show |
| 12 NonAsciiIdentifierToken, |
| 13 ErrorKind, |
| 14 ErrorToken; |
| 15 |
| 16 import 'precedence.dart' as Precedence; |
| 17 |
| 18 import 'precedence.dart' show |
| 19 PrecedenceInfo; |
| 20 |
| 10 /// Recover from errors in [tokens]. The original sources are provided as | 21 /// Recover from errors in [tokens]. The original sources are provided as |
| 11 /// [bytes]. [lineStarts] are the beginning character offsets of lines, and | 22 /// [bytes]. [lineStarts] are the beginning character offsets of lines, and |
| 12 /// must be updated if recovery is performed rewriting the original source | 23 /// must be updated if recovery is performed rewriting the original source |
| 13 /// code. | 24 /// code. |
| 14 Token defaultRecoveryStrategy( | 25 Token defaultRecoveryStrategy( |
| 15 List<int> bytes, Token tokens, List<int> lineStarts) { | 26 List<int> bytes, Token tokens, List<int> lineStarts) { |
| 16 // See [Parser.reportErrorToken](package:front_end/src/fasta/parser/src/parser
.dart) for how | 27 // See [Parser.reportErrorToken](package:front_end/src/fasta/parser/src/parser
.dart) for how |
| 17 // it currently handles lexical errors. In addition, notice how the parser | 28 // it currently handles lexical errors. In addition, notice how the parser |
| 18 // calls [handleInvalidExpression], [handleInvalidFunctionBody], and | 29 // calls [handleInvalidExpression], [handleInvalidFunctionBody], and |
| 19 // [handleInvalidTypeReference] to allow the listener to recover its internal | 30 // [handleInvalidTypeReference] to allow the listener to recover its internal |
| 20 // state. See [package:compiler/src/parser/element_listener.dart] for an | 31 // state. See [package:compiler/src/parser/element_listener.dart] for an |
| 21 // example of how these events are used. | 32 // example of how these events are used. |
| 22 // | 33 // |
| 23 // In addition, the scanner will attempt a bit of recovery when braces don't | 34 // In addition, the scanner will attempt a bit of recovery when braces don't |
| 24 // match up during brace grouping. See | 35 // match up during brace grouping. See |
| 25 // [ArrayBasedScanner.discardBeginGroupUntil](array_based_scanner.dart). For | 36 // [ArrayBasedScanner.discardBeginGroupUntil](array_based_scanner.dart). For |
| 26 // more details on brace grouping see | 37 // more details on brace grouping see |
| 27 // [AbstractScanner.unmatchedBeginGroup](abstract_scanner.dart). | 38 // [AbstractScanner.unmatchedBeginGroup](abstract_scanner.dart). |
| 28 return tokens; | 39 |
| 29 } | 40 /// Tokens with errors. |
| 41 ErrorToken error; |
| 42 /// Used for appending to [error]. |
| 43 ErrorToken errorTail; |
| 44 |
| 45 /// Tokens without errors. |
| 46 Token good; |
| 47 /// Used for appending to [good]. |
| 48 Token goodTail; |
| 49 |
| 50 /// The previous token appended to [good]. Since tokens are single linked |
| 51 /// lists, this allows us to rewrite the current token without scanning all |
| 52 /// of [good]. This is supposed to be the token immediately before |
| 53 /// [goodTail], that is, `beforeGoodTail.next == goodTail`. |
| 54 Token beforeGoodTail; |
| 55 |
| 56 recoverIdentifier(NonAsciiIdentifierToken first) { |
| 57 List codeUnits = <int>[]; |
| 58 |
| 59 // True if the previous good token is an identifier and ends right where |
| 60 // [first] starts. This is the case for input like `blåbærgrød`. In this |
| 61 // case, the scanner produces this sequence of tokens: |
| 62 // |
| 63 // [ |
| 64 // StringToken("bl"), |
| 65 // NonAsciiIdentifierToken("å"), |
| 66 // StringToken("b"), |
| 67 // NonAsciiIdentifierToken("æ"), |
| 68 // StringToken("rgr"), |
| 69 // NonAsciiIdentifierToken("ø"), |
| 70 // StringToken("d"), |
| 71 // EOF, |
| 72 // ] |
| 73 bool prepend = false; |
| 74 |
| 75 // True if following token is also an identifier that starts right where |
| 76 // [errorTail] ends. This is the case for "b" above. |
| 77 bool append = false; |
| 78 if (goodTail != null) { |
| 79 if (goodTail.info == Precedence.IDENTIFIER_INFO && |
| 80 goodTail.charEnd == first.charOffset) { |
| 81 prepend = true; |
| 82 } |
| 83 } |
| 84 Token next = errorTail.next; |
| 85 if (next.info == Precedence.IDENTIFIER_INFO && |
| 86 errorTail.charOffset + 1 == next.charOffset) { |
| 87 append = true; |
| 88 } |
| 89 if (prepend) { |
| 90 codeUnits.addAll(goodTail.value.codeUnits); |
| 91 } |
| 92 NonAsciiIdentifierToken current = first; |
| 93 while (current != errorTail) { |
| 94 codeUnits.add(current.character); |
| 95 current = current.next; |
| 96 } |
| 97 codeUnits.add(errorTail.character); |
| 98 int charOffset = first.charOffset; |
| 99 if (prepend) { |
| 100 charOffset = goodTail.charOffset; |
| 101 if (beforeGoodTail == null) { |
| 102 // We're prepending the first good token, so the new token will become |
| 103 // the first good token. |
| 104 good = null; |
| 105 goodTail = null; |
| 106 beforeGoodTail = null; |
| 107 } else { |
| 108 goodTail = beforeGoodTail; |
| 109 } |
| 110 } |
| 111 if (append) { |
| 112 codeUnits.addAll(next.value.codeUnits); |
| 113 next = next.next; |
| 114 } |
| 115 String value = new String.fromCharCodes(codeUnits); |
| 116 Token recovered = synthesizeToken( |
| 117 charOffset, value, Precedence.IDENTIFIER_INFO); |
| 118 recovered.next = next; |
| 119 return recovered; |
| 120 } |
| 121 |
| 122 recoverExponent() { |
| 123 return synthesizeToken(errorTail.charOffset, "NaN", Precedence.DOUBLE_INFO); |
| 124 } |
| 125 |
| 126 recoverString() { |
| 127 // TODO(ahe): Improve this. |
| 128 return skipToEof(errorTail); |
| 129 } |
| 130 |
| 131 recoverHexDigit() { |
| 132 return synthesizeToken(errorTail.charOffset, "-1", Precedence.INT_INFO); |
| 133 } |
| 134 |
| 135 recoverStringInterpolation() { |
| 136 // TODO(ahe): Improve this. |
| 137 return skipToEof(errorTail); |
| 138 } |
| 139 |
| 140 recoverComment() { |
| 141 // TODO(ahe): Improve this. |
| 142 return skipToEof(errorTail); |
| 143 } |
| 144 |
| 145 recoverUnmatched() { |
| 146 // TODO(ahe): Try to use top-level keywords (such as `class`, `typedef`, |
| 147 // and `enum`) and identation to recover. |
| 148 return errorTail; |
| 149 } |
| 150 |
| 151 for (Token current = tokens; !current.isEof; current = current.next) { |
| 152 if (current is ErrorToken) { |
| 153 ErrorToken first = current; |
| 154 Token next = current; |
| 155 bool treatAsWhitespace = false; |
| 156 do { |
| 157 current = next; |
| 158 if (errorTail == null) { |
| 159 error = next; |
| 160 } else { |
| 161 errorTail.next = next; |
| 162 } |
| 163 errorTail = next; |
| 164 next = next.next; |
| 165 } while (next is ErrorToken && first.errorCode == next.errorCode); |
| 166 |
| 167 switch (first.errorCode) { |
| 168 case ErrorKind.Encoding: |
| 169 case ErrorKind.NonAsciiWhitespace: |
| 170 case ErrorKind.AsciiControlCharacter: |
| 171 treatAsWhitespace = true; |
| 172 break; |
| 173 |
| 174 case ErrorKind.NonAsciiIdentifier: |
| 175 current = recoverIdentifier(first); |
| 176 break; |
| 177 |
| 178 case ErrorKind.MissingExponent: |
| 179 current = recoverExponent(); |
| 180 break; |
| 181 |
| 182 case ErrorKind.UnterminatedString: |
| 183 current = recoverString(); |
| 184 break; |
| 185 |
| 186 case ErrorKind.ExpectedHexDigit: |
| 187 current = recoverHexDigit(); |
| 188 break; |
| 189 |
| 190 case ErrorKind.UnexpectedDollarInString: |
| 191 current = recoverStringInterpolation(); |
| 192 break; |
| 193 |
| 194 case ErrorKind.UnterminatedComment: |
| 195 current = recoverComment(); |
| 196 break; |
| 197 |
| 198 case ErrorKind.UnmatchedToken: |
| 199 current = recoverUnmatched(); |
| 200 break; |
| 201 |
| 202 case ErrorKind.UnterminatedToken: // TODO(ahe): Can this happen? |
| 203 default: |
| 204 treatAsWhitespace = true; |
| 205 break; |
| 206 } |
| 207 if (treatAsWhitespace) continue; |
| 208 } |
| 209 if (goodTail == null) { |
| 210 good = current; |
| 211 } else { |
| 212 goodTail.next = current; |
| 213 } |
| 214 beforeGoodTail = goodTail; |
| 215 goodTail = current; |
| 216 } |
| 217 |
| 218 errorTail.next = good; |
| 219 return error; |
| 220 } |
| 221 |
| 222 Token synthesizeToken(int charOffset, String value, PrecedenceInfo info) { |
| 223 return new StringToken.fromString(info, value, charOffset); |
| 224 } |
| 225 |
| 226 Token skipToEof(Token token) { |
| 227 while (!token.isEof) { |
| 228 token = token.next; |
| 229 } |
| 230 return token; |
| 231 } |
| 232 |
| 233 String closeBraceFor(String openBrace) { |
| 234 return const { |
| 235 '(': ')', |
| 236 '[': ']', |
| 237 '{': '}', |
| 238 '<': '>', |
| 239 r'${': '}', |
| 240 }[openBrace]; |
| 241 } |
| OLD | NEW |