OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library fasta.quote; |
| 6 |
| 7 import 'errors.dart' show |
| 8 inputError, |
| 9 internalError; |
| 10 |
| 11 import 'package:dart_scanner/src/characters.dart' show |
| 12 $BACKSLASH, |
| 13 $BS, |
| 14 $CLOSE_CURLY_BRACKET, |
| 15 $CR, |
| 16 $FF, |
| 17 $LF, |
| 18 $OPEN_CURLY_BRACKET, |
| 19 $SPACE, |
| 20 $TAB, |
| 21 $VTAB, |
| 22 $b, |
| 23 $f, |
| 24 $n, |
| 25 $r, |
| 26 $t, |
| 27 $u, |
| 28 $v, |
| 29 $x, |
| 30 hexDigitValue, |
| 31 isHexDigit; |
| 32 |
| 33 enum Quote { |
| 34 Single, |
| 35 Double, |
| 36 MultiLineSingle, |
| 37 MultiLineDouble, |
| 38 RawSingle, |
| 39 RawDouble, |
| 40 RawMultiLineSingle, |
| 41 RawMultiLineDouble, |
| 42 } |
| 43 |
| 44 Quote analyzeQuote(String first) { |
| 45 if (first.startsWith('"""')) return Quote.MultiLineDouble; |
| 46 if (first.startsWith('r"""')) return Quote.RawMultiLineDouble; |
| 47 if (first.startsWith("'''")) return Quote.MultiLineSingle; |
| 48 if (first.startsWith("r'''")) return Quote.RawMultiLineSingle; |
| 49 if (first.startsWith('"')) return Quote.Double; |
| 50 if (first.startsWith('r"')) return Quote.RawDouble; |
| 51 if (first.startsWith("'")) return Quote.Single; |
| 52 if (first.startsWith("r'")) return Quote.RawSingle; |
| 53 return internalError("Unexpected string literal: $first"); |
| 54 } |
| 55 |
| 56 // Note: based on [StringValidator.quotingFromString] |
| 57 // (pkg/compiler/lib/src/string_validator.dart). |
| 58 int lengthOfOptionalWhitespacePrefix(String first, int start) { |
| 59 List<int> codeUnits = first.codeUnits; |
| 60 for (int i = start; i < codeUnits.length; i++) { |
| 61 int code = codeUnits[i]; |
| 62 if (code == $BACKSLASH) { |
| 63 i++; |
| 64 if (i < codeUnits.length) { |
| 65 code = codeUnits[i]; |
| 66 } else { |
| 67 break; |
| 68 } |
| 69 } |
| 70 if (code == $TAB || code == $SPACE) continue; |
| 71 if (code == $CR) { |
| 72 if (i + 1 < codeUnits.length && codeUnits[i] == $LF) { |
| 73 i++; |
| 74 } |
| 75 return i + 1; |
| 76 } |
| 77 if (code == $LF) { |
| 78 return i + 1; |
| 79 } |
| 80 break; // Not a white-space character. |
| 81 } |
| 82 return start; |
| 83 } |
| 84 |
| 85 int firstQuoteLength(String first, Quote quote) { |
| 86 switch (quote) { |
| 87 case Quote.Single: |
| 88 case Quote.Double: |
| 89 return 1; |
| 90 |
| 91 case Quote.MultiLineSingle: |
| 92 case Quote.MultiLineDouble: |
| 93 return lengthOfOptionalWhitespacePrefix(first, 3); |
| 94 |
| 95 case Quote.RawSingle: |
| 96 case Quote.RawDouble: |
| 97 return 2; |
| 98 |
| 99 case Quote.RawMultiLineSingle: |
| 100 case Quote.RawMultiLineDouble: |
| 101 return lengthOfOptionalWhitespacePrefix(first, 4); |
| 102 } |
| 103 return internalError("Unhandled string quote: $quote"); |
| 104 } |
| 105 |
| 106 int lastQuoteLength(Quote quote) { |
| 107 switch (quote) { |
| 108 case Quote.Single: |
| 109 case Quote.Double: |
| 110 case Quote.RawSingle: |
| 111 case Quote.RawDouble: |
| 112 return 1; |
| 113 |
| 114 case Quote.MultiLineSingle: |
| 115 case Quote.MultiLineDouble: |
| 116 case Quote.RawMultiLineSingle: |
| 117 case Quote.RawMultiLineDouble: |
| 118 return 3; |
| 119 } |
| 120 return internalError("Unhandled string quote: $quote"); |
| 121 } |
| 122 |
| 123 String unescapeFirstStringPart(String first, Quote quote) { |
| 124 return unescape(first.substring(firstQuoteLength(first, quote)), quote); |
| 125 } |
| 126 |
| 127 String unescapeLastStringPart(String last, Quote quote) { |
| 128 return unescape(last.substring(0, last.length - lastQuoteLength(quote)), |
| 129 quote); |
| 130 } |
| 131 |
| 132 String unescapeString(String string) { |
| 133 Quote quote = analyzeQuote(string); |
| 134 return unescape(string.substring( |
| 135 firstQuoteLength(string, quote), string.length - lastQuoteLength(quote)), |
| 136 quote); |
| 137 } |
| 138 |
| 139 String unescape(String string, Quote quote) { |
| 140 switch (quote) { |
| 141 case Quote.Single: |
| 142 case Quote.Double: |
| 143 case Quote.MultiLineSingle: |
| 144 case Quote.MultiLineDouble: |
| 145 break; |
| 146 |
| 147 case Quote.RawSingle: |
| 148 case Quote.RawDouble: |
| 149 case Quote.RawMultiLineSingle: |
| 150 case Quote.RawMultiLineDouble: |
| 151 return string; |
| 152 } |
| 153 return !string.contains("\\") ? string : unescapeCodeUnits(string.codeUnits); |
| 154 } |
| 155 |
| 156 const String incompleteSequence = "Incomplete escape sequence."; |
| 157 |
| 158 const String invalidCharacter = "Invalid character in escape sequence."; |
| 159 |
| 160 const String invalidCodePoint = "Invalid code point."; |
| 161 |
| 162 // Note: based on |
| 163 // [StringValidator.validateString](pkg/compiler/lib/src/string_validator.dart). |
| 164 String unescapeCodeUnits(List<int> codeUnits) { |
| 165 // Can't use Uint8List or Uint16List here, the code units may be larger. |
| 166 List<int> result = new List<int>(codeUnits.length); |
| 167 int resultOffset = 0; |
| 168 error(int offset, String message) { |
| 169 inputError(null, null, message); |
| 170 } |
| 171 for (int i = 0; i < codeUnits.length; i++) { |
| 172 int code = codeUnits[i]; |
| 173 if (code == $BACKSLASH) { |
| 174 if (codeUnits.length == ++i) return error(i, incompleteSequence); |
| 175 code = codeUnits[i]; |
| 176 /// `\n` for newline, equivalent to `\x0A`. |
| 177 /// `\r` for carriage return, equivalent to `\x0D`. |
| 178 /// `\f` for form feed, equivalent to `\x0C`. |
| 179 /// `\b` for backspace, equivalent to `\x08`. |
| 180 /// `\t` for tab, equivalent to `\x09`. |
| 181 /// `\v` for vertical tab, equivalent to `\x0B`. |
| 182 /// `\xXX` for hex escape. |
| 183 /// `\uXXXX` or `\u{XX?X?X?X?X?}` for Unicode hex escape. |
| 184 if (code == $n) { |
| 185 code = $LF; |
| 186 } else if (code == $r) { |
| 187 code = $CR; |
| 188 } else if (code == $f) { |
| 189 code = $FF; |
| 190 } else if (code == $b) { |
| 191 code = $BS; |
| 192 } else if (code == $t) { |
| 193 code = $TAB; |
| 194 } else if (code == $v) { |
| 195 code = $VTAB; |
| 196 } else if (code == $x) { |
| 197 // Expect exactly 2 hex digits. |
| 198 if (codeUnits.length <= i + 2) return error(i, incompleteSequence); |
| 199 code = 0; |
| 200 for (int j = 0; j < 2; j++) { |
| 201 int digit = codeUnits[++i]; |
| 202 if (!isHexDigit(digit)) return error(i, invalidCharacter); |
| 203 code = (code << 4) + hexDigitValue(digit); |
| 204 } |
| 205 } else if (code == $u) { |
| 206 if (codeUnits.length == i + 1) return error(i, incompleteSequence); |
| 207 code = codeUnits[i + 1]; |
| 208 if (code == $OPEN_CURLY_BRACKET) { |
| 209 // Expect 1-6 hex digits followed by '}'. |
| 210 if (codeUnits.length == ++i) return error(i, incompleteSequence); |
| 211 code = 0; |
| 212 for (int j = 0; j < 7; j++) { |
| 213 if (codeUnits.length == ++i) return error(i, incompleteSequence); |
| 214 int digit = codeUnits[i]; |
| 215 if (j != 0 && digit == $CLOSE_CURLY_BRACKET) break; |
| 216 if (!isHexDigit(digit)) return error(i, invalidCharacter); |
| 217 code = (code << 4) + hexDigitValue(digit); |
| 218 } |
| 219 } else { |
| 220 // Expect exactly 4 hex digits. |
| 221 if (codeUnits.length < i + 4) return error(i, incompleteSequence); |
| 222 code = 0; |
| 223 for (int j = 0; j < 4; j++) { |
| 224 int digit = codeUnits[i]; |
| 225 if (!isHexDigit(digit)) return error(i, invalidCharacter); |
| 226 code = (code << 4) + hexDigitValue(digit); |
| 227 } |
| 228 } |
| 229 } else { |
| 230 // Nothing, escaped character is passed through; |
| 231 } |
| 232 if (code > 0x10FFFF) return error(i, invalidCodePoint); |
| 233 } |
| 234 result[resultOffset++] = code; |
| 235 } |
| 236 return new String.fromCharCodes(result, 0, resultOffset); |
| 237 } |
OLD | NEW |