Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 library fasta.quote; | |
| 6 | |
| 7 import 'errors.dart' show | |
| 8 inputError, | |
| 9 internalError; | |
| 10 | |
| 11 import 'package:dart_scanner/src/characters.dart' show | |
| 12 $BACKSLASH, | |
| 13 $BS, | |
| 14 $CLOSE_CURLY_BRACKET, | |
| 15 $CR, | |
| 16 $FF, | |
| 17 $LF, | |
| 18 $OPEN_CURLY_BRACKET, | |
| 19 $SPACE, | |
| 20 $TAB, | |
| 21 $VTAB, | |
| 22 $b, | |
| 23 $f, | |
| 24 $n, | |
| 25 $r, | |
| 26 $t, | |
| 27 $u, | |
| 28 $v, | |
| 29 $x, | |
| 30 hexDigitValue, | |
| 31 isHexDigit; | |
| 32 | |
| 33 enum Quote { | |
| 34 Single, | |
| 35 Double, | |
| 36 MultiLineSingle, | |
| 37 MultiLineDouble, | |
| 38 RawSingle, | |
| 39 RawDouble, | |
| 40 RawMultiLineSingle, | |
| 41 RawMultiLineDouble, | |
| 42 } | |
| 43 | |
| 44 Quote analyzeQuote(String first) { | |
| 45 if (first.startsWith('"""')) return Quote.MultiLineDouble; | |
| 46 if (first.startsWith('r"""')) return Quote.RawMultiLineDouble; | |
| 47 if (first.startsWith("'''")) return Quote.MultiLineSingle; | |
| 48 if (first.startsWith("r'''")) return Quote.RawMultiLineSingle; | |
| 49 if (first.startsWith('"')) return Quote.Double; | |
| 50 if (first.startsWith('r"')) return Quote.RawDouble; | |
| 51 if (first.startsWith("'")) return Quote.Single; | |
| 52 if (first.startsWith("r'")) return Quote.RawSingle; | |
| 53 return internalError("Unexpected string literal: $first"); | |
| 54 } | |
| 55 | |
| 56 int lengthOfOptionalWhitespacePrefix(String first, int start) { | |
| 57 List<int> codeUnits = first.codeUnits; | |
| 58 for (int i = start; i < codeUnits.length; i++) { | |
| 59 int code = codeUnits[i]; | |
| 60 if (code == $BACKSLASH) { | |
| 61 i++; | |
| 62 if (i < codeUnits.length) { | |
| 63 code = codeUnits[i]; | |
| 64 } else { | |
| 65 break; | |
| 66 } | |
| 67 } | |
| 68 if (code == $TAB || code == $SPACE) continue; | |
| 69 if (code == $CR) { | |
| 70 if (i + 1 < codeUnits.length && codeUnits[i] == $LF) { | |
| 71 i++; | |
| 72 } | |
| 73 return i + 1; | |
| 74 } | |
| 75 if (code == $LF) { | |
| 76 return i + 1; | |
| 77 } | |
|
Johnni Winther
2017/01/16 13:01:19
Shouldn't we break after this? If we have
String
ahe
2017/01/16 15:26:34
I think you're right. I copied this from pkg/compi
| |
| 78 } | |
| 79 return start; | |
| 80 } | |
| 81 | |
| 82 int firstQuoteLength(String first, Quote quote) { | |
| 83 switch (quote) { | |
| 84 case Quote.Single: | |
| 85 case Quote.Double: | |
| 86 return 1; | |
| 87 | |
| 88 case Quote.MultiLineSingle: | |
| 89 case Quote.MultiLineDouble: | |
| 90 return lengthOfOptionalWhitespacePrefix(first, 3); | |
| 91 | |
| 92 case Quote.RawSingle: | |
| 93 case Quote.RawDouble: | |
| 94 return 2; | |
| 95 | |
| 96 case Quote.RawMultiLineSingle: | |
| 97 case Quote.RawMultiLineDouble: | |
| 98 return lengthOfOptionalWhitespacePrefix(first, 4); | |
| 99 } | |
| 100 return internalError("Unhandled string quote: $quote"); | |
| 101 } | |
| 102 | |
| 103 int lastQuoteLength(Quote quote) { | |
| 104 switch (quote) { | |
| 105 case Quote.Single: | |
| 106 case Quote.Double: | |
| 107 case Quote.RawSingle: | |
| 108 case Quote.RawDouble: | |
| 109 return 1; | |
| 110 | |
| 111 case Quote.MultiLineSingle: | |
| 112 case Quote.MultiLineDouble: | |
| 113 case Quote.RawMultiLineSingle: | |
| 114 case Quote.RawMultiLineDouble: | |
| 115 return 3; | |
| 116 } | |
| 117 return internalError("Unhandled string quote: $quote"); | |
| 118 } | |
| 119 | |
| 120 String unescapeFirstStringPart(String first, Quote quote) { | |
| 121 return unescape(first.substring(firstQuoteLength(first, quote)), quote); | |
| 122 } | |
| 123 | |
| 124 String unescapeLastStringPart(String last, Quote quote) { | |
| 125 return unescape(last.substring(0, last.length - lastQuoteLength(quote)), | |
| 126 quote); | |
| 127 } | |
| 128 | |
| 129 String unescapeString(String string) { | |
| 130 Quote quote = analyzeQuote(string); | |
| 131 return unescape(string.substring( | |
| 132 firstQuoteLength(string, quote), string.length - lastQuoteLength(quote)), | |
| 133 quote); | |
| 134 } | |
| 135 | |
| 136 String unescape(String string, Quote quote) { | |
| 137 switch (quote) { | |
| 138 case Quote.Single: | |
| 139 case Quote.Double: | |
| 140 case Quote.MultiLineSingle: | |
| 141 case Quote.MultiLineDouble: | |
| 142 break; | |
| 143 | |
| 144 case Quote.RawSingle: | |
| 145 case Quote.RawDouble: | |
| 146 case Quote.RawMultiLineSingle: | |
| 147 case Quote.RawMultiLineDouble: | |
| 148 return string; | |
| 149 } | |
| 150 return !string.contains("\\") ? string : unescapeCodeUnits(string.codeUnits); | |
| 151 } | |
| 152 | |
| 153 const String incompleteSequence = "Incomplete escape sequence."; | |
| 154 | |
| 155 const String invalidCharacter = "Invalid character in escape sequence."; | |
| 156 | |
| 157 const String invalidCodePoint = "Invalid code point."; | |
| 158 | |
| 159 String unescapeCodeUnits(List<int> codeUnits) { | |
| 160 // Can't use Uint8List or Uint16List here, the code units may be larger. | |
| 161 List<int> result = new List<int>(codeUnits.length); | |
| 162 int resultOffset = 0; | |
| 163 error(int offset, String message) { | |
| 164 inputError(null, null, message); | |
| 165 } | |
| 166 for (int i = 0; i < codeUnits.length; i++) { | |
| 167 int code = codeUnits[i]; | |
| 168 if (code == $BACKSLASH) { | |
| 169 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
| 170 code = codeUnits[i]; | |
| 171 /// `\n` for newline, equivalent to `\x0A`. | |
| 172 /// `\r` for carriage return, equivalent to `\x0D`. | |
| 173 /// `\f` for form feed, equivalent to `\x0C`. | |
| 174 /// `\b` for backspace, equivalent to `\x08`. | |
| 175 /// `\t` for tab, equivalent to `\x09`. | |
| 176 /// `\v` for vertical tab, equivalent to `\x0B`. | |
| 177 /// `\xXX` for hex escape. | |
| 178 /// `\uXXXX` or `\u{XX?X?X?X?X?}` for Unicode hex escape. | |
| 179 if (code == $n) { | |
| 180 code = $LF; | |
| 181 } else if (code == $r) { | |
| 182 code = $CR; | |
| 183 } else if (code == $f) { | |
| 184 code = $FF; | |
| 185 } else if (code == $b) { | |
| 186 code = $BS; | |
| 187 } else if (code == $t) { | |
| 188 code = $TAB; | |
| 189 } else if (code == $v) { | |
| 190 code = $VTAB; | |
| 191 } else if (code == $x) { | |
| 192 // Expect exactly 2 hex digits. | |
| 193 if (codeUnits.length <= i + 2) return error(i, incompleteSequence); | |
| 194 code = 0; | |
| 195 for (int j = 0; j < 2; j++) { | |
| 196 int digit = codeUnits[++i]; | |
| 197 if (!isHexDigit(digit)) return error(i, invalidCharacter); | |
| 198 code = (code << 4) + hexDigitValue(digit); | |
| 199 } | |
| 200 } else if (code == $u) { | |
| 201 if (codeUnits.length == i + 1) return error(i, incompleteSequence); | |
| 202 code = codeUnits[i + 1]; | |
| 203 if (code == $OPEN_CURLY_BRACKET) { | |
| 204 // Expect 1-6 hex digits followed by '}'. | |
| 205 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
| 206 code = 0; | |
| 207 for (int j = 0; j < 7; j++) { | |
| 208 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
| 209 int digit = codeUnits[i]; | |
| 210 if (j != 0 && digit == $CLOSE_CURLY_BRACKET) break; | |
| 211 if (!isHexDigit(digit)) return error(i, invalidCharacter); | |
| 212 code = (code << 4) + hexDigitValue(digit); | |
| 213 } | |
| 214 } else { | |
| 215 // Expect exactly 4 hex digits. | |
| 216 code = 0; | |
| 217 for (int j = 0; j < 4; j++) { | |
| 218 if (codeUnits.length == ++i) return error(i, incompleteSequence); | |
|
Johnni Winther
2017/01/16 13:01:19
Remove this and add a check before the loop, like
ahe
2017/01/16 15:26:34
Done.
| |
| 219 int digit = codeUnits[i]; | |
| 220 if (!isHexDigit(digit)) return error(i, invalidCharacter); | |
| 221 code = (code << 4) + hexDigitValue(digit); | |
| 222 } | |
| 223 } | |
| 224 } else { | |
| 225 // Nothing, escaped character is passed through; | |
| 226 } | |
| 227 if (code > 0xFFFF) { | |
| 228 if (code > 0x10FFFF) return error(i, invalidCodePoint); | |
|
Johnni Winther
2017/01/16 13:01:19
Why check for '> 0xFFFF' first?
ahe
2017/01/16 15:26:33
Probably because I was tired when I copied this fr
| |
| 229 } | |
| 230 } | |
| 231 result[resultOffset++] = code; | |
| 232 } | |
| 233 return new String.fromCharCodes(result, 0, resultOffset); | |
| 234 } | |
| OLD | NEW |