Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style licenset hat can be found in the LICENSE file. | 3 // BSD-style licenset hat can be found in the LICENSE file. |
| 4 | 4 |
| 5 library dart_scanner.recover; | 5 library dart_scanner.recover; |
| 6 | 6 |
| 7 import 'token.dart' show | 7 import 'token.dart' show |
| 8 StringToken, | |
| 8 Token; | 9 Token; |
| 9 | 10 |
| 11 import 'error_token.dart' show | |
| 12 NonAsciiIdentifierToken, | |
| 13 ErrorKind, | |
| 14 ErrorToken; | |
| 15 | |
| 16 import 'precedence.dart' as Precedence; | |
| 17 | |
| 18 import 'precedence.dart' show | |
| 19 PrecedenceInfo; | |
| 20 | |
| 10 /// Recover from errors in [tokens]. The original sources are provided as | 21 /// Recover from errors in [tokens]. The original sources are provided as |
| 11 /// [bytes]. [lineStarts] are the beginning character offsets of lines, and | 22 /// [bytes]. [lineStarts] are the beginning character offsets of lines, and |
| 12 /// must be updated if recovery is performed rewriting the original source | 23 /// must be updated if recovery is performed rewriting the original source |
| 13 /// code. | 24 /// code. |
| 14 Token defaultRecoveryStrategy( | 25 Token defaultRecoveryStrategy( |
| 15 List<int> bytes, Token tokens, List<int> lineStarts) { | 26 List<int> bytes, Token tokens, List<int> lineStarts) { |
| 16 // See [Parser.reportErrorToken](package:dart_parser/src/parser.dart) for how | 27 // See [Parser.reportErrorToken](package:dart_parser/src/parser.dart) for how |
| 17 // it currently handles lexical errors. In addition, notice how the parser | 28 // it currently handles lexical errors. In addition, notice how the parser |
| 18 // calls [handleInvalidExpression], [handleInvalidFunctionBody], and | 29 // calls [handleInvalidExpression], [handleInvalidFunctionBody], and |
| 19 // [handleInvalidTypeReference] to allow the listener to recover its internal | 30 // [handleInvalidTypeReference] to allow the listener to recover its internal |
| 20 // state. See [package:compiler/src/parser/element_listener.dart] for an | 31 // state. See [package:compiler/src/parser/element_listener.dart] for an |
| 21 // example of how these events are used. | 32 // example of how these events are used. |
| 22 // | 33 // |
| 23 // In addition, the scanner will attempt a bit of recovery when braces don't | 34 // In addition, the scanner will attempt a bit of recovery when braces don't |
| 24 // match up during brace grouping. See | 35 // match up during brace grouping. See |
| 25 // [ArrayBasedScanner.discardBeginGroupUntil](array_based_scanner.dart). For | 36 // [ArrayBasedScanner.discardBeginGroupUntil](array_based_scanner.dart). For |
| 26 // more details on brace grouping see | 37 // more details on brace grouping see |
| 27 // [AbstractScanner.unmatchedBeginGroup](abstract_scanner.dart). | 38 // [AbstractScanner.unmatchedBeginGroup](abstract_scanner.dart). |
| 28 return tokens; | 39 |
| 40 ErrorToken error; | |
| 41 ErrorToken errorTail; | |
| 42 Token good; | |
| 43 Token goodTail; | |
| 44 Token beforeGoodTail; | |
|
Johnni Winther
2017/01/30 09:04:38
Add doc to these variables.
ahe
2017/01/30 13:26:22
Done.
| |
| 45 | |
| 46 recoverIdentifier(NonAsciiIdentifierToken first) { | |
| 47 List codeUnits = <int>[]; | |
| 48 | |
| 49 // True if the previous good token is an identifier and ends right where | |
| 50 // [first] starts. This is the case for input like `blåbærgrød`. In this | |
| 51 // case, the scanner produces this sequence of tokens: | |
| 52 // | |
| 53 // [ | |
| 54 // StringToken("bl"), | |
| 55 // NonAsciiIdentifierToken("å"), | |
| 56 // StringToken("b"), | |
| 57 // NonAsciiIdentifierToken("æ"), | |
| 58 // StringToken("rgr"), | |
| 59 // NonAsciiIdentifierToken("ø"), | |
| 60 // StringToken("d"), | |
| 61 // EOF, | |
| 62 // ] | |
| 63 bool prepend = false; | |
| 64 | |
| 65 // True if following token is also an identifier that starts right where | |
| 66 // [errorTail] ends. This is the case for "b" above. | |
| 67 bool append = false; | |
| 68 if (goodTail != null) { | |
| 69 if (goodTail.info == Precedence.IDENTIFIER_INFO && | |
| 70 goodTail.charEnd == first.charOffset) { | |
| 71 prepend = true; | |
| 72 } | |
| 73 } | |
| 74 Token next = errorTail.next; | |
| 75 if (next.info == Precedence.IDENTIFIER_INFO && | |
| 76 errorTail.charOffset + 1 == next.charOffset) { | |
| 77 append = true; | |
| 78 } | |
| 79 if (prepend) { | |
| 80 codeUnits.addAll(goodTail.value.codeUnits); | |
| 81 } | |
| 82 NonAsciiIdentifierToken current = first; | |
| 83 while (current != errorTail) { | |
| 84 codeUnits.add(current.character); | |
| 85 current = current.next; | |
| 86 } | |
| 87 codeUnits.add(errorTail.character); | |
| 88 int charOffset = first.charOffset; | |
| 89 if (prepend) { | |
| 90 charOffset = goodTail.charOffset; | |
| 91 if (beforeGoodTail == null) { | |
| 92 // We're prepending the first good token, so the new token will become | |
| 93 // the first good tooken. | |
|
Johnni Winther
2017/01/30 09:04:38
tooken -> token
ahe
2017/01/30 13:26:22
Done.
| |
| 94 good = null; | |
| 95 goodTail = null; | |
| 96 beforeGoodTail = null; | |
| 97 } else { | |
| 98 goodTail = beforeGoodTail; | |
| 99 } | |
| 100 } | |
| 101 if (append) { | |
| 102 codeUnits.addAll(next.value.codeUnits); | |
| 103 next = next.next; | |
| 104 } | |
| 105 String value = new String.fromCharCodes(codeUnits); | |
| 106 Token recovered = synthesizeToken( | |
| 107 charOffset, value, Precedence.IDENTIFIER_INFO); | |
| 108 recovered.next = next; | |
| 109 return recovered; | |
| 110 } | |
| 111 | |
| 112 recoverExponent() { | |
| 113 return synthesizeToken(errorTail.charOffset, "NaN", Precedence.DOUBLE_INFO); | |
| 114 } | |
| 115 | |
| 116 recoverString() { | |
| 117 // TODO(ahe): Improve this. | |
| 118 return skipToEof(errorTail); | |
| 119 } | |
| 120 | |
| 121 recoverHexDigit() { | |
| 122 return synthesizeToken(errorTail.charOffset, "-1", Precedence.INT_INFO); | |
| 123 } | |
| 124 | |
| 125 recoverStringInterpolation() { | |
| 126 // TODO(ahe): Improve this. | |
| 127 return skipToEof(errorTail); | |
| 128 } | |
| 129 | |
| 130 recoverComment() { | |
| 131 // TODO(ahe): Improve this. | |
| 132 return skipToEof(errorTail); | |
| 133 } | |
| 134 | |
| 135 recoverUnmatched() { | |
| 136 // TODO(ahe): Try to use top-level keywords (such as `class`, `typedef`, | |
| 137 // and `enum`) and identation to recover. | |
| 138 return errorTail; | |
| 139 } | |
| 140 | |
| 141 for (Token current = tokens; !current.isEof; current = current.next) { | |
| 142 if (current is ErrorToken) { | |
| 143 ErrorToken first = current; | |
| 144 Token next = current; | |
| 145 bool treatAsWhitespace = false; | |
| 146 do { | |
| 147 current = next; | |
| 148 if (errorTail == null) { | |
| 149 error = next; | |
| 150 } else { | |
| 151 errorTail.next = next; | |
| 152 } | |
| 153 errorTail = next; | |
| 154 next = next.next; | |
| 155 } while (next is ErrorToken && first.errorCode == next.errorCode); | |
| 156 | |
| 157 switch (first.errorCode) { | |
| 158 case ErrorKind.Encoding: | |
| 159 case ErrorKind.NonAsciiWhitespace: | |
| 160 case ErrorKind.AsciiControlCharacter: | |
| 161 treatAsWhitespace = true; | |
| 162 break; | |
| 163 | |
| 164 case ErrorKind.NonAsciiIdentifier: | |
| 165 current = recoverIdentifier(first); | |
| 166 break; | |
| 167 | |
| 168 case ErrorKind.MissingExponent: | |
| 169 current = recoverExponent(); | |
| 170 break; | |
| 171 | |
| 172 case ErrorKind.UnterminatedString: | |
| 173 current = recoverString(); | |
| 174 break; | |
| 175 | |
| 176 case ErrorKind.ExpectedHexDigit: | |
| 177 current = recoverHexDigit(); | |
| 178 break; | |
| 179 | |
| 180 case ErrorKind.UnexpectedDollarInString: | |
| 181 current = recoverStringInterpolation(); | |
| 182 break; | |
| 183 | |
| 184 case ErrorKind.UnterminatedComment: | |
| 185 current = recoverComment(); | |
| 186 break; | |
| 187 | |
| 188 case ErrorKind.UnmatchedToken: | |
| 189 current = recoverUnmatched(); | |
| 190 break; | |
| 191 | |
| 192 case ErrorKind.UnterminatedToken: // TODO(ahe): Can this happen? | |
| 193 default: | |
| 194 treatAsWhitespace = true; | |
| 195 break; | |
| 196 } | |
| 197 if (treatAsWhitespace) continue; | |
| 198 } | |
| 199 if (goodTail == null) { | |
| 200 good = current; | |
| 201 } else { | |
| 202 goodTail.next = current; | |
| 203 } | |
| 204 beforeGoodTail = goodTail; | |
| 205 goodTail = current; | |
| 206 } | |
| 207 | |
| 208 errorTail.next = good; | |
| 209 return error; | |
| 29 } | 210 } |
| 211 | |
| 212 Token synthesizeToken(int charOffset, String value, PrecedenceInfo info) { | |
| 213 return new StringToken.fromString(info, value, charOffset); | |
| 214 } | |
| 215 | |
| 216 Token skipToEof(Token token) { | |
| 217 while (!token.isEof) { | |
| 218 token = token.next; | |
| 219 } | |
| 220 return token; | |
| 221 } | |
| 222 | |
| 223 String closeBraceFor(String openBrace) { | |
| 224 return const { | |
| 225 '(': ')', | |
| 226 '[': ']', | |
| 227 '{': '}', | |
| 228 '<': '>', | |
| 229 r'${': '}', | |
| 230 }[openBrace]; | |
| 231 } | |
| OLD | NEW |